diff --git a/src/lib.rs b/src/lib.rs
index bbdacda..fc01fa2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -241,6 +241,7 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result<ParseOu
 
     let mut all_parsers = default_parsers(&tt, &config);
     all_parsers.extend(config.custom_parsers);
+    let mut chromium_events: Vec<serde_json::Value> = Vec::new();
 
     while let Some((lineno, line)) = iter.next() {
         bytes_read += line.len() as u64;
@@ -438,6 +439,10 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result<ParseOu
             unknown_stack_trie.insert(stack.clone(), None);
         }
 
+        if let Some(_) = e.chromium_event {
+            chromium_events.push(serde_json::from_str(&payload)?);
+        }
+
         if let Some(specialization) = e.symbolic_shape_specialization {
             symbolic_shape_specialization_index
                 .borrow_mut()
@@ -463,6 +468,11 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result<ParseOu
     pb.finish_with_message("done");
     spinner.finish();
 
+    output.push((
+        PathBuf::from("chromium_events.json"),
+        serde_json::to_string_pretty(&chromium_events).unwrap(),
+    ));
+
     eprintln!("{:?}", stats);
     if unknown_fields.len() > 0 {
         eprintln!(
@@ -485,6 +495,7 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result<ParseOu
         unknown_stack_trie_html: unknown_stack_trie.fmt(Some(&metrics_index)).unwrap(),
         has_unknown_stack_trie: !unknown_stack_trie.is_empty(),
         num_breaks: breaks.failures.len(),
+        has_chromium_events: !chromium_events.is_empty(),
     };
     output.push((
         PathBuf::from("index.html"),
diff --git a/src/parsers.rs b/src/parsers.rs
index 4e9d9a7..eab4d32 100644
--- a/src/parsers.rs
+++ b/src/parsers.rs
@@ -529,10 +529,10 @@ impl StructuredLogParser for DumpFileParser {
     }
     fn parse<'e>(
         &self,
-        lineno: usize,
+        _lineno: usize,
         metadata: Metadata<'e>,
         _rank: Option<u32>,
-        compile_id: &Option<CompileId>,
+        _compile_id: &Option<CompileId>,
         payload: &str,
     ) -> anyhow::Result<ParserResults> {
         if let Metadata::DumpFile(metadata) = metadata {
diff --git a/src/templates.rs b/src/templates.rs
index de1e77d..73b2df1 100644
--- a/src/templates.rs
+++ b/src/templates.rs
@@ -124,6 +124,12 @@ phase generates:
 <li>Inductor will apply some post grad FX passes, producing <code>inductor_post_grad_graph</code></li>
 <li>Inductor will perform code generation, producing the final <code>inductor_output_code</code> which will be executed at runtime.  This output is a valid Python program and can be directly run.</li>
 </ol>
+
+{{ if has_chromium_events }}
+<h2> Chromium Events </h2>
+PT2 generates <a href='chromium_events.json'>Chromium Trace Events</a> in JSON on specific events during compilation.
+You can download and view them in a tool like <a href='https://ui.perfetto.dev/'>Perfetto</a>.
+{{ endif  }}
 <p>
 Build products below:
 </p>
@@ -139,6 +145,7 @@ Build products below:
 {{ endfor }}
 </ul>
 </div>
+
 {{ if has_unknown_stack_trie }}
 <div>
 <h2>Unknown stacks</h2>
diff --git a/src/types.rs b/src/types.rs
index 439c06a..8f54b45 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -452,6 +452,7 @@ pub struct Envelope {
     pub describe_tensor: Option<TensorDesc>,
     pub describe_source: Option<SourceDesc>,
     pub dump_file: Option<DumpFileMetadata>,
+    pub chromium_event: Option<EmptyMetadata>,
     #[serde(flatten)]
     pub _other: FxHashMap<String, Value>,
 }
@@ -561,6 +562,7 @@ pub struct IndexContext {
     pub has_unknown_stack_trie: bool,
     pub num_breaks: usize,
     pub custom_header_html: String,
+    pub has_chromium_events: bool,
 }
 
 #[derive(Debug, Serialize)]
diff --git a/tests/inputs/chromium_nanogpt_cache_miss.log b/tests/inputs/chromium_nanogpt_cache_miss.log
new file mode 100644
index 0000000..eb36139
--- /dev/null
+++ b/tests/inputs/chromium_nanogpt_cache_miss.log
@@ -0,0 +1,54323 @@
+V0806 13:55:51.342000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V0806 13:55:51.343000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", 1]}
+V0806 13:55:51.347000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/benchmarks/dynamo/common.py", 2]}
+V0806 13:55:51.347000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/_dynamo/eval_frame.py", 3]}
+V0806 13:55:51.347000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 436, "name": "forward_and_backward_pass", "filename": 1}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.347000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "987ad3072722930cb3b501d122b778d3"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977751347358.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.347000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "b620def9ef9967d776f34efe72aa81c2"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977751347454.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.354000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "7a0a797a9f90400fd9294a20d8172183"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['inputs'], accessed_by=DictGetItemGuardAccessor(inputs)
+	| | +- TYPE_MATCH: ___check_type_id(L['inputs'], 94206128741824)               
+	| | +- LENGTH_CHECK: len(L['inputs']) == 1                                       
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['clone_inputs'], accessed_by=DictGetItemGuardAccessor(clone_inputs)
+	| | | +- ID_MATCH: ___check_obj_id(G['clone_inputs'], 140561895812272)         
+	
+V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a1b3b6de20677769e59e37a7f0b2ff44"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977751355009.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "29faf91a8e7dc2fd407f0cd8c9138e40"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977751355078.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:51.355000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "0/0", "frame_key": "1", "co_name": "forward_and_backward_pass", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 436, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977751.3473148, "entire_frame_compile_time_s": 0.007780313491821289, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function clone_inputs in file /data/users/jjwu/a/pytorch/torch/_dynamo/utils.py'"], "dynamo_time_before_restart_s": 0.0032989978790283203, "has_guarded_code": true}, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.355000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 437, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}]}, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.356000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "852a1ce2f4400e662ec69b85aded0d21"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977751356089.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.356000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "bf233ab38e77bab91b061da69473da43"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977751356160.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.362000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "b5d988194b7254d348ea6c2f189958d2"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod)
+	| | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328)                  
+	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self)
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624)                 
+	| +- GuardManager: source=L['___stack0'], accessed_by=DictGetItemGuardAccessor(___stack0)
+	| | +- TYPE_MATCH: ___check_type_id(L['___stack0'], 94206128766016)            
+	| | +- LENGTH_CHECK: len(L['___stack0']) == 1                                    
+	
+V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "59b4092dd5f70a1dae78d9cdde0701a6"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977751362494.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e19fed31c91f5fe9e56a180c1627d3e8"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977751362564.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:51.362000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "1/0", "frame_key": "2", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 437, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 8, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977751.3560677, "entire_frame_compile_time_s": 0.006527423858642578, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'inline in skipfiles: BenchmarkRunner.optimizer_zero_grad | inner /data/users/jjwu/a/pytorch/torch/_compile.py, skipped according trace_rules.lookup SKIP_DIRS'"], "dynamo_time_before_restart_s": 0.001367807388305664, "has_guarded_code": true}, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.363000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}]}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.363000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c96559249d58820ff5a3a4d087e4c79e"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977751363510.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.363000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "213a253af88886478de090f7196d7e90"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977751363579.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:51.366000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 4, "size": 760}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.367000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.int64", "device": "device(type='cuda', index=0)", "size": [1, 64], "is_leaf": true, "stride": [64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70a97f830>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.367000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 0, "source": "L['cloned_inputs'][0]"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 4, "size": 154533888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12980>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.373000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 2, "source": "L['mod']._modules['transformer']._modules['wte']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.376000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 4, "size": 3145728}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.376000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca100e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.377000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 3, "source": "L['mod']._modules['transformer']._modules['wpe']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 6, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12840>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.386000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 6, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 7, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca128e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 7, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12520>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.392000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 11, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca124d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.393000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 12, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12610>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.410000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 36, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12340>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.411000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 37, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12660>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.417000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 44, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12890>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.418000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 45, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 11, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11fd0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.422000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 49, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 50, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 12, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12110>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.423000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 50, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 13, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.434000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 66, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d50>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.435000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 67, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 15, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca121b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.441000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 71, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12570>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 72, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 17, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11cb0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.447000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 76, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 77, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11df0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.448000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 77, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 78, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 19, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f30>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.457000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 78, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 79, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 20, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ad0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.458000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 79, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 21, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f80>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.462000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 80, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12390>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.463000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 81, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.466000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 23, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 85, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 86, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 24, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b70>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.467000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 86, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 87, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 25, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a80>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 87, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.474000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 88, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca117b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.474000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 88, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 27, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11bc0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 89, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ee0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 90, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.484000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 29, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11710>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 94, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 95, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11800>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 95, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.494000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 96, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 31, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca116c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 96, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 97, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 32, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.496000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 97, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 33, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 98, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c60>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 99, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 35, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11350>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 103, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 104, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 36, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11300>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.505000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 104, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 105, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 37, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11580>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 105, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 106, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f40>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 106, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 39, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca114e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 107, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11850>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 108, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 41, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d10>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 112, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 113, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10fe0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 113, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 114, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 43, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11120>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 114, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 115, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 44, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ef0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 115, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 45, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca111c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 116, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.538000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11260>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 117, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 47, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a40>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.541000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 121, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 122, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 48, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c20>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 122, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 123, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 49, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ea0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 123, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 124, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b30>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 124, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 51, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e00>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 125, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11170>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 126, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 53, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10630>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 130, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.560000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 131, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10950>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.560000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 131, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 132, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 55, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ae0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.569000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 132, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 133, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 56, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10900>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 133, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 57, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10bd0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 134, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e50>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 135, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 59, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10310>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 139, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 140, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 60, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10590>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.579000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 140, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 141, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 61, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10770>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 141, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 142, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10540>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 142, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 63, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca106d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.591000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 143, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a90>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.592000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 144, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 65, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca101d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 148, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 149, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10270>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 149, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 150, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 67, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca103b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 150, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 151, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 68, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d40>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.607000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 151, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 69, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 152, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca108b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 153, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 71, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11da0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 157, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 158, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 72, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12020>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.617000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 158, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 159, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 73, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12700>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 159, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 160, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca119e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.623000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 160, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 75, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca123e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 161, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.629000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 162, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 77, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 166, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 167, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10cc0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 167, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 168, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 79, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11080>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 168, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 169, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 80, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca105e0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 169, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 81, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11670>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.648000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 170, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12ac0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 171, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 83, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12a70>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 175, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 176, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 84, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12b10>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 176, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 177, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 85, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca125c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 177, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 178, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca120c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 178, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 87, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12cf0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 179, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca112b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.666000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 180, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 89, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12430>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 184, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 185, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca126b0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 185, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 186, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 91, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 186, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 187, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 92, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12200>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 187, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 93, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca129d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 188, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10360>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 189, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 95, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e40>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 193, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 194, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 96, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12160>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 194, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 195, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 97, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122a0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 195, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 196, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e90>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 196, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.702000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 99, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12250>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 197, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 198, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 101, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a30>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 202, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 203, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b20>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.709000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 203, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 204, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 103, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c10>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 204, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 205, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 104, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11940>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.719000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 205, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 105, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d00>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 206, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11990>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 207, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 107, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11490>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 211, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 212, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 108, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11530>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.729000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 212, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 213, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 109, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca115d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 213, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.734000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.735000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 214, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11440>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.735000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 214, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 111, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b80>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 215, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.740000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.741000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.741000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 216, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 113, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f90>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.745000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 220, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 221, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10680>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.746000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 221, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 222, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 115, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11030>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.755000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 222, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 223, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 116, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10db0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.756000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 223, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 224, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 117, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11210>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.760000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 224, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 225, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11760>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.761000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 225, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 229, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 119, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d90>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 229, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.765000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.766000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 230, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 120, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca107c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.766000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 230, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 231, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 121, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c70>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.771000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 231, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 232, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10860>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.772000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 232, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 233, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 123, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109f0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.777000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 233, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 234, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca110d0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.778000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 234, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.782000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 238, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 125, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca102c0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 238, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.783000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 239, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10450>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.784000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 239, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.792000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 240, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 127, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd7e1f63740>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 240, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 241, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 128, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10220>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.793000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 241, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 242, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 129, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10810>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.797000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 242, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 243, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d60>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.798000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 243, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 247, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 131, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff60>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.802000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 247, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 248, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 132, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe20>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.803000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 248, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 249, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 133, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fdd0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.808000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 249, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 250, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe70>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.809000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 250, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 251, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 135, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12750>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.814000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 251, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 252, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10720>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.815000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 252, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.819000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 4, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.819000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 256, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 137, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd80>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 256, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 4, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 257, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fbf0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.820000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 257, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 4, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 258, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 139, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc90>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.829000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 258, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 259, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 140, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc40>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.830000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 259, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 260, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 141, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff10>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.834000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 260, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 261, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fec0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.835000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 261, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 265, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 143, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fba0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.839000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 265, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 4, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 266, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 144, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fa60>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.840000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 266, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 4, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 267, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 145, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd30>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.845000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 267, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 268, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fab0>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.846000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 268, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 269, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 147, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12c00>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.850000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 269, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 4, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 270, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10130>", "describer_id": 4}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.851000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 4, "id": 270, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 5, "size": 760}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.int64", "device": "device(type='cuda', index=0)", "size": [1, 64], "is_leaf": true, "stride": [64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70a97f830>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.863000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 0, "source": "L['cloned_inputs'][0]"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 5, "size": 154533888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12980>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.868000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 1, "source": "L['mod']._modules['transformer']._modules['wte']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 5, "size": 3145728}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca100e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.870000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 2, "source": "L['mod']._modules['transformer']._modules['wpe']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12840>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.877000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 3, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 4, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca128e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.878000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 4, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 8, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12520>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.883000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 8, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 9, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca124d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.884000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 9, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 10, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12610>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.893000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 10, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12340>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.894000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 11, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12660>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.898000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 12, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 13, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12890>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.899000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 13, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.902000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 17, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 11, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11fd0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 17, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.903000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.904000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 18, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 12, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12110>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.904000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 18, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 19, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 13, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.909000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 19, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 20, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d50>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.910000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 20, "source": "L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 21, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 15, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca121b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.915000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 21, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 22, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12570>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.916000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 22, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 26, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 17, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11cb0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.920000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 26, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 27, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11df0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.921000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 27, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 28, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 19, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f30>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.930000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 28, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 29, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 20, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ad0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.931000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 29, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 30, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 21, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f80>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.935000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 30, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 31, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12390>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.936000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 31, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 35, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 23, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.940000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 35, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 24, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b70>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.941000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 36, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 25, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a80>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.946000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 37, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 38, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca117b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.947000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 38, "source": "L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 39, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 27, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11bc0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.952000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 39, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 40, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ee0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.953000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 40, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 29, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11710>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.957000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 44, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11800>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.958000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 45, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 46, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 31, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca116c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.967000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 46, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 47, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 32, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.968000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 47, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 48, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 33, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.972000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 48, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c60>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.973000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 49, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 53, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 35, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11350>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.977000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 53, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 54, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 36, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11300>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.978000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 54, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 55, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 37, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11580>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.983000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 55, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 56, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f40>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.984000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 56, "source": "L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.989000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 57, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 39, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca114e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 57, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.990000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.991000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 58, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11850>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.991000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 58, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 62, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 41, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d10>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.995000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 62, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 63, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10fe0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:51.996000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 63, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 64, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 43, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11120>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.005000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 64, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 65, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 44, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ef0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.006000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 65, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 45, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca111c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.010000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 66, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11260>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.011000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 67, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 47, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a40>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 71, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 48, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c20>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.016000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 72, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 73, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 49, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ea0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 73, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 74, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b30>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.022000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 74, "source": "L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 75, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 51, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e00>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.027000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 75, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11170>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.028000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 76, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 53, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10630>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 80, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10950>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.033000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 81, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 82, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 55, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ae0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.042000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 82, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 83, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 56, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10900>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 83, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 84, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 57, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10bd0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.047000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 84, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e50>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 85, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 59, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10310>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 89, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 60, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10590>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.053000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 90, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 91, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 61, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10770>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 91, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 92, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10540>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 92, "source": "L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 93, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 63, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca106d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 93, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a90>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 94, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 65, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca101d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.069000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 98, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10270>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 99, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 100, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 67, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca103b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 100, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 101, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 68, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d40>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.080000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 101, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 102, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 69, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.084000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 102, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca108b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.085000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 103, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.088000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.088000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 71, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11da0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 107, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 72, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12020>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.089000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 108, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.094000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 109, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 73, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12700>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 109, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.095000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 110, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca119e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.096000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 110, "source": "L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 111, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 75, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca123e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.101000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 111, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.102000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 112, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 77, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.106000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 116, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10cc0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.107000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 117, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 118, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 79, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11080>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.116000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 118, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 119, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 80, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca105e0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.117000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 119, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 120, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 81, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11670>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.121000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 120, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12ac0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.122000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 121, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 83, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12a70>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.126000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 125, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 84, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12b10>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.127000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 126, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 127, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 85, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca125c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.132000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 127, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 128, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca120c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.133000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 128, "source": "L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 129, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 87, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12cf0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.138000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 129, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca112b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.139000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 130, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 89, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12430>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.143000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 134, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca126b0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.144000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 135, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 136, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 91, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.153000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 136, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 137, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 92, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12200>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.154000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 137, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 138, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 93, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca129d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.158000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 138, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10360>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.159000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 139, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 95, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e40>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.163000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 143, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 96, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12160>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.164000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 144, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 145, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 97, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122a0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.169000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 145, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 146, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e90>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.170000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 146, "source": "L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 147, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 99, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12250>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.175000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 147, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.176000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 148, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 101, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a30>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.180000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 152, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b20>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.181000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 153, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 154, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 103, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c10>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.190000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 154, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 155, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 104, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11940>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.191000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 155, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 156, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 105, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d00>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.195000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 156, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11990>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.196000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 157, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 107, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11490>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.200000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 161, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 108, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11530>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.201000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 162, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 163, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 109, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca115d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.206000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 163, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 164, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11440>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.207000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 164, "source": "L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 165, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 111, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b80>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.212000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 165, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.213000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 166, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.217000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 113, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f90>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 170, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.218000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.219000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10680>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.219000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 171, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.227000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 172, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 115, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11030>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 172, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 173, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 116, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10db0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.228000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 173, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 174, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 117, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11210>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.232000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 174, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11760>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.233000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 175, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 119, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d90>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.237000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 179, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 120, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca107c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.238000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 180, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 181, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 121, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c70>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.243000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 181, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 182, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10860>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.244000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 182, "source": "L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 183, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 123, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109f0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.249000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 183, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca110d0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.250000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 184, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 125, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca102c0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.254000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 188, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10450>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.255000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 189, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 190, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 127, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd7e1f63740>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.264000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 190, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 191, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 128, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10220>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.265000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 191, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 192, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 129, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10810>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.269000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 192, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d60>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.270000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 193, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 131, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff60>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.274000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 197, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 132, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe20>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.275000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 198, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 199, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 133, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fdd0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.280000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 199, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 200, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe70>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.281000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 200, "source": "L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 201, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 135, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12750>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.286000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 201, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10720>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.287000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 202, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 5, "size": 7077888}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 137, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd80>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.291000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 206, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 5, "size": 9216}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fbf0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.292000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 207, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 5, "size": 2359296}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 208, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 139, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc90>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.301000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 208, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 209, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 140, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc40>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.302000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 209, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 210, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 141, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff10>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.306000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 210, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fec0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.307000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 211, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.310000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.310000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 143, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fba0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 215, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 5, "size": 12288}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.311000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 144, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fa60>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.312000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 216, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.316000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 5, "size": 9437184}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 217, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 145, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd30>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 217, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.317000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.318000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 218, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fab0>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.318000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 218, "source": "L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.321000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 219, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 147, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12c00>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 219, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['weight']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.322000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 5, "size": 3072}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.323000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10130>", "describer_id": 5}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.323000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 5, "id": 220, "source": "L['mod']._modules['transformer']._modules['ln_f']._parameters['bias']"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:55:52.360000 4107173 torch/_dynamo/output_graph.py:1337] {"dynamo_output_graph": {"sizes": {"l_cloned_inputs_0_": [1, 64], "l_mod_modules_transformer_modules_wte_parameters_weight_": [50304, 768], "l_mod_modules_transformer_modules_wpe_parameters_weight_": [1024, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_": [2304, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_": [2304], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_": [768, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_": [768], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_": [3072, 768], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_": [3072], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_": [768, 3072], "l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_": [768], "l_mod_modules_transformer_modules_ln_f_parameters_weight_": [768], "l_mod_modules_transformer_modules_ln_f_parameters_bias_": [768], "arange": [64], "pos": [1, 64], "tok_emb": [1, 64, 768], "pos_emb": [1, 64, 768], "add": [1, 64, 768], "x": [1, 64, 768], "layer_norm": [1, 64, 768], "linear": [1, 64, 2304], "q": [1, 64, 768], "k": [1, 64, 768], "v": [1, 64, 768], "view": [1, 64, 12, 64], "k_1": [1, 12, 64, 64], "view_1": [1, 64, 12, 64], "q_1": [1, 12, 64, 64], "view_2": [1, 64, 12, 64], "v_1": [1, 12, 64, 64], "y": [1, 12, 64, 64], "transpose_3": [1, 64, 12, 64], "contiguous": [1, 64, 12, 64], "y_1": [1, 64, 768], "linear_1": [1, 64, 768], "y_2": [1, 64, 768], "x_1": [1, 64, 768], "layer_norm_1": [1, 64, 768], "x_2": [1, 64, 3072], "mul": [1, 64, 3072], "pow_1": [1, 64, 3072], "mul_1": [1, 64, 3072], "add_2": [1, 64, 3072], "mul_2": [1, 64, 3072], "tanh": [1, 64, 3072], "add_3": [1, 64, 3072], "x_3": [1, 64, 3072], "x_4": [1, 64, 768], "x_5": [1, 64, 768], "x_6": [1, 64, 768], "layer_norm_2": [1, 64, 768], "linear_4": [1, 64, 2304], "q_2": [1, 64, 768], "k_2": [1, 64, 768], "v_2": [1, 64, 768], "view_4": [1, 64, 12, 64], "k_3": [1, 12, 64, 64], "view_5": [1, 64, 12, 64], "q_3": [1, 12, 64, 64], "view_6": [1, 64, 12, 64], "v_3": [1, 12, 64, 64], "y_3": [1, 12, 64, 64], "transpose_7": [1, 64, 12, 64], "contiguous_1": [1, 64, 12, 64], "y_4": [1, 64, 768], "linear_5": [1, 64, 768], "y_5": [1, 64, 768], "x_7": [1, 64, 768], "layer_norm_3": [1, 64, 768], "x_8": [1, 64, 3072], "mul_4": [1, 64, 3072], "pow_2": [1, 64, 3072], "mul_5": [1, 64, 3072], "add_6": [1, 64, 3072], "mul_6": [1, 64, 3072], "tanh_1": [1, 64, 3072], "add_7": [1, 64, 3072], "x_9": [1, 64, 3072], "x_10": [1, 64, 768], "x_11": [1, 64, 768], "x_12": [1, 64, 768], "layer_norm_4": [1, 64, 768], "linear_8": [1, 64, 2304], "q_4": [1, 64, 768], "k_4": [1, 64, 768], "v_4": [1, 64, 768], "view_8": [1, 64, 12, 64], "k_5": [1, 12, 64, 64], "view_9": [1, 64, 12, 64], "q_5": [1, 12, 64, 64], "view_10": [1, 64, 12, 64], "v_5": [1, 12, 64, 64], "y_6": [1, 12, 64, 64], "transpose_11": [1, 64, 12, 64], "contiguous_2": [1, 64, 12, 64], "y_7": [1, 64, 768], "linear_9": [1, 64, 768], "y_8": [1, 64, 768], "x_13": [1, 64, 768], "layer_norm_5": [1, 64, 768], "x_14": [1, 64, 3072], "mul_8": [1, 64, 3072], "pow_3": [1, 64, 3072], "mul_9": [1, 64, 3072], "add_10": [1, 64, 3072], "mul_10": [1, 64, 3072], "tanh_2": [1, 64, 3072], "add_11": [1, 64, 3072], "x_15": [1, 64, 3072], "x_16": [1, 64, 768], "x_17": [1, 64, 768], "x_18": [1, 64, 768], "layer_norm_6": [1, 64, 768], "linear_12": [1, 64, 2304], "q_6": [1, 64, 768], "k_6": [1, 64, 768], "v_6": [1, 64, 768], "view_12": [1, 64, 12, 64], "k_7": [1, 12, 64, 64], "view_13": [1, 64, 12, 64], "q_7": [1, 12, 64, 64], "view_14": [1, 64, 12, 64], "v_7": [1, 12, 64, 64], "y_9": [1, 12, 64, 64], "transpose_15": [1, 64, 12, 64], "contiguous_3": [1, 64, 12, 64], "y_10": [1, 64, 768], "linear_13": [1, 64, 768], "y_11": [1, 64, 768], "x_19": [1, 64, 768], "layer_norm_7": [1, 64, 768], "x_20": [1, 64, 3072], "mul_12": [1, 64, 3072], "pow_4": [1, 64, 3072], "mul_13": [1, 64, 3072], "add_14": [1, 64, 3072], "mul_14": [1, 64, 3072], "tanh_3": [1, 64, 3072], "add_15": [1, 64, 3072], "x_21": [1, 64, 3072], "x_22": [1, 64, 768], "x_23": [1, 64, 768], "x_24": [1, 64, 768], "layer_norm_8": [1, 64, 768], "linear_16": [1, 64, 2304], "q_8": [1, 64, 768], "k_8": [1, 64, 768], "v_8": [1, 64, 768], "view_16": [1, 64, 12, 64], "k_9": [1, 12, 64, 64], "view_17": [1, 64, 12, 64], "q_9": [1, 12, 64, 64], "view_18": [1, 64, 12, 64], "v_9": [1, 12, 64, 64], "y_12": [1, 12, 64, 64], "transpose_19": [1, 64, 12, 64], "contiguous_4": [1, 64, 12, 64], "y_13": [1, 64, 768], "linear_17": [1, 64, 768], "y_14": [1, 64, 768], "x_25": [1, 64, 768], "layer_norm_9": [1, 64, 768], "x_26": [1, 64, 3072], "mul_16": [1, 64, 3072], "pow_5": [1, 64, 3072], "mul_17": [1, 64, 3072], "add_18": [1, 64, 3072], "mul_18": [1, 64, 3072], "tanh_4": [1, 64, 3072], "add_19": [1, 64, 3072], "x_27": [1, 64, 3072], "x_28": [1, 64, 768], "x_29": [1, 64, 768], "x_30": [1, 64, 768], "layer_norm_10": [1, 64, 768], "linear_20": [1, 64, 2304], "q_10": [1, 64, 768], "k_10": [1, 64, 768], "v_10": [1, 64, 768], "view_20": [1, 64, 12, 64], "k_11": [1, 12, 64, 64], "view_21": [1, 64, 12, 64], "q_11": [1, 12, 64, 64], "view_22": [1, 64, 12, 64], "v_11": [1, 12, 64, 64], "y_15": [1, 12, 64, 64], "transpose_23": [1, 64, 12, 64], "contiguous_5": [1, 64, 12, 64], "y_16": [1, 64, 768], "linear_21": [1, 64, 768], "y_17": [1, 64, 768], "x_31": [1, 64, 768], "layer_norm_11": [1, 64, 768], "x_32": [1, 64, 3072], "mul_20": [1, 64, 3072], "pow_6": [1, 64, 3072], "mul_21": [1, 64, 3072], "add_22": [1, 64, 3072], "mul_22": [1, 64, 3072], "tanh_5": [1, 64, 3072], "add_23": [1, 64, 3072], "x_33": [1, 64, 3072], "x_34": [1, 64, 768], "x_35": [1, 64, 768], "x_36": [1, 64, 768], "layer_norm_12": [1, 64, 768], "linear_24": [1, 64, 2304], "q_12": [1, 64, 768], "k_12": [1, 64, 768], "v_12": [1, 64, 768], "view_24": [1, 64, 12, 64], "k_13": [1, 12, 64, 64], "view_25": [1, 64, 12, 64], "q_13": [1, 12, 64, 64], "view_26": [1, 64, 12, 64], "v_13": [1, 12, 64, 64], "y_18": [1, 12, 64, 64], "transpose_27": [1, 64, 12, 64], "contiguous_6": [1, 64, 12, 64], "y_19": [1, 64, 768], "linear_25": [1, 64, 768], "y_20": [1, 64, 768], "x_37": [1, 64, 768], "layer_norm_13": [1, 64, 768], "x_38": [1, 64, 3072], "mul_24": [1, 64, 3072], "pow_7": [1, 64, 3072], "mul_25": [1, 64, 3072], "add_26": [1, 64, 3072], "mul_26": [1, 64, 3072], "tanh_6": [1, 64, 3072], "add_27": [1, 64, 3072], "x_39": [1, 64, 3072], "x_40": [1, 64, 768], "x_41": [1, 64, 768], "x_42": [1, 64, 768], "layer_norm_14": [1, 64, 768], "linear_28": [1, 64, 2304], "q_14": [1, 64, 768], "k_14": [1, 64, 768], "v_14": [1, 64, 768], "view_28": [1, 64, 12, 64], "k_15": [1, 12, 64, 64], "view_29": [1, 64, 12, 64], "q_15": [1, 12, 64, 64], "view_30": [1, 64, 12, 64], "v_15": [1, 12, 64, 64], "y_21": [1, 12, 64, 64], "transpose_31": [1, 64, 12, 64], "contiguous_7": [1, 64, 12, 64], "y_22": [1, 64, 768], "linear_29": [1, 64, 768], "y_23": [1, 64, 768], "x_43": [1, 64, 768], "layer_norm_15": [1, 64, 768], "x_44": [1, 64, 3072], "mul_28": [1, 64, 3072], "pow_8": [1, 64, 3072], "mul_29": [1, 64, 3072], "add_30": [1, 64, 3072], "mul_30": [1, 64, 3072], "tanh_7": [1, 64, 3072], "add_31": [1, 64, 3072], "x_45": [1, 64, 3072], "x_46": [1, 64, 768], "x_47": [1, 64, 768], "x_48": [1, 64, 768], "layer_norm_16": [1, 64, 768], "linear_32": [1, 64, 2304], "q_16": [1, 64, 768], "k_16": [1, 64, 768], "v_16": [1, 64, 768], "view_32": [1, 64, 12, 64], "k_17": [1, 12, 64, 64], "view_33": [1, 64, 12, 64], "q_17": [1, 12, 64, 64], "view_34": [1, 64, 12, 64], "v_17": [1, 12, 64, 64], "y_24": [1, 12, 64, 64], "transpose_35": [1, 64, 12, 64], "contiguous_8": [1, 64, 12, 64], "y_25": [1, 64, 768], "linear_33": [1, 64, 768], "y_26": [1, 64, 768], "x_49": [1, 64, 768], "layer_norm_17": [1, 64, 768], "x_50": [1, 64, 3072], "mul_32": [1, 64, 3072], "pow_9": [1, 64, 3072], "mul_33": [1, 64, 3072], "add_34": [1, 64, 3072], "mul_34": [1, 64, 3072], "tanh_8": [1, 64, 3072], "add_35": [1, 64, 3072], "x_51": [1, 64, 3072], "x_52": [1, 64, 768], "x_53": [1, 64, 768], "x_54": [1, 64, 768], "layer_norm_18": [1, 64, 768], "linear_36": [1, 64, 2304], "q_18": [1, 64, 768], "k_18": [1, 64, 768], "v_18": [1, 64, 768], "view_36": [1, 64, 12, 64], "k_19": [1, 12, 64, 64], "view_37": [1, 64, 12, 64], "q_19": [1, 12, 64, 64], "view_38": [1, 64, 12, 64], "v_19": [1, 12, 64, 64], "y_27": [1, 12, 64, 64], "transpose_39": [1, 64, 12, 64], "contiguous_9": [1, 64, 12, 64], "y_28": [1, 64, 768], "linear_37": [1, 64, 768], "y_29": [1, 64, 768], "x_55": [1, 64, 768], "layer_norm_19": [1, 64, 768], "x_56": [1, 64, 3072], "mul_36": [1, 64, 3072], "pow_10": [1, 64, 3072], "mul_37": [1, 64, 3072], "add_38": [1, 64, 3072], "mul_38": [1, 64, 3072], "tanh_9": [1, 64, 3072], "add_39": [1, 64, 3072], "x_57": [1, 64, 3072], "x_58": [1, 64, 768], "x_59": [1, 64, 768], "x_60": [1, 64, 768], "layer_norm_20": [1, 64, 768], "linear_40": [1, 64, 2304], "q_20": [1, 64, 768], "k_20": [1, 64, 768], "v_20": [1, 64, 768], "view_40": [1, 64, 12, 64], "k_21": [1, 12, 64, 64], "view_41": [1, 64, 12, 64], "q_21": [1, 12, 64, 64], "view_42": [1, 64, 12, 64], "v_21": [1, 12, 64, 64], "y_30": [1, 12, 64, 64], "transpose_43": [1, 64, 12, 64], "contiguous_10": [1, 64, 12, 64], "y_31": [1, 64, 768], "linear_41": [1, 64, 768], "y_32": [1, 64, 768], "x_61": [1, 64, 768], "layer_norm_21": [1, 64, 768], "x_62": [1, 64, 3072], "mul_40": [1, 64, 3072], "pow_11": [1, 64, 3072], "mul_41": [1, 64, 3072], "add_42": [1, 64, 3072], "mul_42": [1, 64, 3072], "tanh_10": [1, 64, 3072], "add_43": [1, 64, 3072], "x_63": [1, 64, 3072], "x_64": [1, 64, 768], "x_65": [1, 64, 768], "x_66": [1, 64, 768], "layer_norm_22": [1, 64, 768], "linear_44": [1, 64, 2304], "q_22": [1, 64, 768], "k_22": [1, 64, 768], "v_22": [1, 64, 768], "view_44": [1, 64, 12, 64], "k_23": [1, 12, 64, 64], "view_45": [1, 64, 12, 64], "q_23": [1, 12, 64, 64], "view_46": [1, 64, 12, 64], "v_23": [1, 12, 64, 64], "y_33": [1, 12, 64, 64], "transpose_47": [1, 64, 12, 64], "contiguous_11": [1, 64, 12, 64], "y_34": [1, 64, 768], "linear_45": [1, 64, 768], "y_35": [1, 64, 768], "x_67": [1, 64, 768], "layer_norm_23": [1, 64, 768], "x_68": [1, 64, 3072], "mul_44": [1, 64, 3072], "pow_12": [1, 64, 3072], "mul_45": [1, 64, 3072], "add_46": [1, 64, 3072], "mul_46": [1, 64, 3072], "tanh_11": [1, 64, 3072], "add_47": [1, 64, 3072], "x_69": [1, 64, 3072], "x_70": [1, 64, 768], "x_71": [1, 64, 768], "x_72": [1, 64, 768], "x_73": [1, 64, 768], "getitem_36": [1, 1, 768], "logits": [1, 1, 50304]}}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "e1533188987bb53e01424902ec0e389a"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, L_cloned_inputs_0_: "i64[1, 64][64, 1]cuda:0", L_mod_modules_transformer_modules_wte_parameters_weight_: "f32[50304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_wpe_parameters_weight_: "f32[1024, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_: "f32[2304, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_: "f32[2304][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_: "f32[768, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_: "f32[3072, 768][768, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_: "f32[3072][1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_: "f32[768, 3072][3072, 1]cuda:0", L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_ln_f_parameters_weight_: "f32[768][1]cuda:0", L_mod_modules_transformer_modules_ln_f_parameters_bias_: "f32[768][1]cuda:0"):
+	        l_cloned_inputs_0_ = L_cloned_inputs_0_
+	        l_mod_modules_transformer_modules_wte_parameters_weight_ = L_mod_modules_transformer_modules_wte_parameters_weight_
+	        l_mod_modules_transformer_modules_wpe_parameters_weight_ = L_mod_modules_transformer_modules_wpe_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_
+	        l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_ = L_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_
+	        l_mod_modules_transformer_modules_ln_f_parameters_weight_ = L_mod_modules_transformer_modules_ln_f_parameters_weight_
+	        l_mod_modules_transformer_modules_ln_f_parameters_bias_ = L_mod_modules_transformer_modules_ln_f_parameters_bias_
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(
+	        arange: "i64[64][1]cuda:0" = torch.arange(0, 64, dtype = torch.int64, device = device(type='cuda', index=0))
+	        pos: "i64[1, 64][64, 1]cuda:0" = arange.unsqueeze(0);  arange = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        tok_emb: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.embedding(l_cloned_inputs_0_, l_mod_modules_transformer_modules_wte_parameters_weight_, None, None, 2.0, False, False);  l_cloned_inputs_0_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        pos_emb: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.embedding(pos, l_mod_modules_transformer_modules_wpe_parameters_weight_, None, None, 2.0, False, False);  pos = l_mod_modules_transformer_modules_wpe_parameters_weight_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb)
+	        add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = tok_emb + pos_emb;  tok_emb = pos_emb = None
+	        x: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(add, 0.0, True, False);  add = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x, (768,), l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_);  layer_norm = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split = linear.split(768, dim = 2);  linear = None
+	        q: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0]
+	        k: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1]
+	        v: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2];  split = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k.view(1, 64, 12, 64);  k = None
+	        k_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view.transpose(1, 2);  view = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_1: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q.view(1, 64, 12, 64);  q = None
+	        q_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_1.transpose(1, 2);  view_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v.view(1, 64, 12, 64);  v = None
+	        v_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_2.transpose(1, 2);  view_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_1, k_1, v_1, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_1 = k_1 = v_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_3: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y.transpose(1, 2);  y = None
+	        contiguous: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_3.contiguous();  transpose_3 = None
+	        y_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous.view(1, 64, 768);  contiguous = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_1, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_);  y_1 = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_1, 0.0, True, False);  linear_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x + y_2;  x = y_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_1, (768,), l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_1, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_1 = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_2
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_2, 3.0)
+	        mul_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_1;  pow_1 = None
+	        add_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_2 + mul_1;  x_2 = mul_1 = None
+	        mul_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_2;  add_2 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_2);  mul_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh;  tanh = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul * add_3;  mul = add_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_3, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_);  x_3 = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_0_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_4, 0.0, True, False);  x_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_1 + x_5;  x_1 = x_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_6, (768,), l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_2, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_2 = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_1 = linear_4.split(768, dim = 2);  linear_4 = None
+	        q_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0]
+	        k_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1]
+	        v_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2];  split_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_2.view(1, 64, 12, 64);  k_2 = None
+	        k_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_4.transpose(1, 2);  view_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_5: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_2.view(1, 64, 12, 64);  q_2 = None
+	        q_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_5.transpose(1, 2);  view_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_6: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_2.view(1, 64, 12, 64);  v_2 = None
+	        v_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_6.transpose(1, 2);  view_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_3: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_3, k_3, v_3, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_3 = k_3 = v_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_7: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_3.transpose(1, 2);  y_3 = None
+	        contiguous_1: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_7.contiguous();  transpose_7 = None
+	        y_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_1.view(1, 64, 768);  contiguous_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_4, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_);  y_4 = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_5, 0.0, True, False);  linear_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_6 + y_5;  x_6 = y_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_7, (768,), l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_3, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_3 = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_8
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_8, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_2;  pow_2 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_8 + mul_5;  x_8 = mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_6;  add_6 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_6);  mul_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_1;  tanh_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_4 * add_7;  mul_4 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_9, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_);  x_9 = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_1_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_10, 0.0, True, False);  x_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_7 + x_11;  x_7 = x_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_12, (768,), l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_4, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_4 = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_2 = linear_8.split(768, dim = 2);  linear_8 = None
+	        q_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0]
+	        k_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1]
+	        v_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2];  split_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_8: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_4.view(1, 64, 12, 64);  k_4 = None
+	        k_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_8.transpose(1, 2);  view_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_9: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_4.view(1, 64, 12, 64);  q_4 = None
+	        q_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_9.transpose(1, 2);  view_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_10: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_4.view(1, 64, 12, 64);  v_4 = None
+	        v_5: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_10.transpose(1, 2);  view_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_6: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_5, k_5, v_5, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_5 = k_5 = v_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_11: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_6.transpose(1, 2);  y_6 = None
+	        contiguous_2: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_11.contiguous();  transpose_11 = None
+	        y_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_2.view(1, 64, 768);  contiguous_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_7, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_);  y_7 = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_9, 0.0, True, False);  linear_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_12 + y_8;  x_12 = y_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_13, (768,), l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_5, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_5 = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_14
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_14, 3.0)
+	        mul_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_3;  pow_3 = None
+	        add_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_14 + mul_9;  x_14 = mul_9 = None
+	        mul_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_10;  add_10 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_10);  mul_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_2;  tanh_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_8 * add_11;  mul_8 = add_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_15, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_);  x_15 = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_2_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_16, 0.0, True, False);  x_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_13 + x_17;  x_13 = x_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_18, (768,), l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_12: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_6, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_6 = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_3 = linear_12.split(768, dim = 2);  linear_12 = None
+	        q_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0]
+	        k_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1]
+	        v_6: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2];  split_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_12: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_6.view(1, 64, 12, 64);  k_6 = None
+	        k_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_12.transpose(1, 2);  view_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_13: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_6.view(1, 64, 12, 64);  q_6 = None
+	        q_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_13.transpose(1, 2);  view_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_6.view(1, 64, 12, 64);  v_6 = None
+	        v_7: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_14.transpose(1, 2);  view_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_9: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_7, k_7, v_7, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_7 = k_7 = v_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_15: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_9.transpose(1, 2);  y_9 = None
+	        contiguous_3: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_15.contiguous();  transpose_15 = None
+	        y_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_3.view(1, 64, 768);  contiguous_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_10, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_);  y_10 = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_13, 0.0, True, False);  linear_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_18 + y_11;  x_18 = y_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_19, (768,), l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_7, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_7 = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_20
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_20, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_4;  pow_4 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_20 + mul_13;  x_20 = mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_14;  add_14 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_14);  mul_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_3;  tanh_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_12 * add_15;  mul_12 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_21, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_);  x_21 = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_3_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_22, 0.0, True, False);  x_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_19 + x_23;  x_19 = x_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_24, (768,), l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_16: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_8, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_8 = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_4 = linear_16.split(768, dim = 2);  linear_16 = None
+	        q_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0]
+	        k_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1]
+	        v_8: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2];  split_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_8.view(1, 64, 12, 64);  k_8 = None
+	        k_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_16.transpose(1, 2);  view_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_17: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_8.view(1, 64, 12, 64);  q_8 = None
+	        q_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_17.transpose(1, 2);  view_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_18: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_8.view(1, 64, 12, 64);  v_8 = None
+	        v_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_18.transpose(1, 2);  view_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_12: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_9, k_9, v_9, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_9 = k_9 = v_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_19: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_12.transpose(1, 2);  y_12 = None
+	        contiguous_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_19.contiguous();  transpose_19 = None
+	        y_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_4.view(1, 64, 768);  contiguous_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_13, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_);  y_13 = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_17, 0.0, True, False);  linear_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_24 + y_14;  x_24 = y_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_25, (768,), l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_9, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_9 = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_26
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_26, 3.0)
+	        mul_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_5;  pow_5 = None
+	        add_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_26 + mul_17;  x_26 = mul_17 = None
+	        mul_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_18;  add_18 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_18);  mul_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_4;  tanh_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_16 * add_19;  mul_16 = add_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_28: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_27, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_);  x_27 = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_4_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_28, 0.0, True, False);  x_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_25 + x_29;  x_25 = x_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_30, (768,), l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_20: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_10, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_10 = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_5 = linear_20.split(768, dim = 2);  linear_20 = None
+	        q_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0]
+	        k_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1]
+	        v_10: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2];  split_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_20: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_10.view(1, 64, 12, 64);  k_10 = None
+	        k_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_20.transpose(1, 2);  view_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_21: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_10.view(1, 64, 12, 64);  q_10 = None
+	        q_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_21.transpose(1, 2);  view_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_22: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_10.view(1, 64, 12, 64);  v_10 = None
+	        v_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_22.transpose(1, 2);  view_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_15: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_11, k_11, v_11, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_11 = k_11 = v_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_23: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_15.transpose(1, 2);  y_15 = None
+	        contiguous_5: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_23.contiguous();  transpose_23 = None
+	        y_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_5.view(1, 64, 768);  contiguous_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_16, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_);  y_16 = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_21, 0.0, True, False);  linear_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_30 + y_17;  x_30 = y_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_31, (768,), l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_32: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_11, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_11 = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_32
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_32, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_6;  pow_6 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_32 + mul_21;  x_32 = mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_22;  add_22 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_22);  mul_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_5;  tanh_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_20 * add_23;  mul_20 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_33, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_);  x_33 = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_5_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_34, 0.0, True, False);  x_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_36: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_31 + x_35;  x_31 = x_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_36, (768,), l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_24: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_12, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_12 = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_6 = linear_24.split(768, dim = 2);  linear_24 = None
+	        q_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0]
+	        k_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1]
+	        v_12: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2];  split_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_24: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_12.view(1, 64, 12, 64);  k_12 = None
+	        k_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_24.transpose(1, 2);  view_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_25: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_12.view(1, 64, 12, 64);  q_12 = None
+	        q_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_25.transpose(1, 2);  view_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_12.view(1, 64, 12, 64);  v_12 = None
+	        v_13: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_26.transpose(1, 2);  view_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_18: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_13, k_13, v_13, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_13 = k_13 = v_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_27: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_18.transpose(1, 2);  y_18 = None
+	        contiguous_6: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_27.contiguous();  transpose_27 = None
+	        y_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_6.view(1, 64, 768);  contiguous_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_19, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_);  y_19 = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_25, 0.0, True, False);  linear_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_36 + y_20;  x_36 = y_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_37, (768,), l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_13, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_13 = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_38
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_38, 3.0)
+	        mul_25: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_7;  pow_7 = None
+	        add_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_38 + mul_25;  x_38 = mul_25 = None
+	        mul_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_26;  add_26 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_26);  mul_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_6;  tanh_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_24 * add_27;  mul_24 = add_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_39, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_);  x_39 = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_6_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_40, 0.0, True, False);  x_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_37 + x_41;  x_37 = x_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_42, (768,), l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_28: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_14, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_14 = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_7 = linear_28.split(768, dim = 2);  linear_28 = None
+	        q_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0]
+	        k_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1]
+	        v_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2];  split_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_14.view(1, 64, 12, 64);  k_14 = None
+	        k_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_28.transpose(1, 2);  view_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_29: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_14.view(1, 64, 12, 64);  q_14 = None
+	        q_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_29.transpose(1, 2);  view_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_30: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_14.view(1, 64, 12, 64);  v_14 = None
+	        v_15: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_30.transpose(1, 2);  view_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_21: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_15, k_15, v_15, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_15 = k_15 = v_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_31: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_21.transpose(1, 2);  y_21 = None
+	        contiguous_7: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_31.contiguous();  transpose_31 = None
+	        y_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_7.view(1, 64, 768);  contiguous_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_22, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_);  y_22 = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_29, 0.0, True, False);  linear_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_42 + y_23;  x_42 = y_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_43, (768,), l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_15, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_15 = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_44
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_44, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_8;  pow_8 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_44 + mul_29;  x_44 = mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_30;  add_30 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_30);  mul_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_7;  tanh_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_28 * add_31;  mul_28 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_46: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_45, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_);  x_45 = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_7_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_46, 0.0, True, False);  x_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_43 + x_47;  x_43 = x_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_48, (768,), l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_32: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_16, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_16 = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_8 = linear_32.split(768, dim = 2);  linear_32 = None
+	        q_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0]
+	        k_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1]
+	        v_16: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2];  split_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_32: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_16.view(1, 64, 12, 64);  k_16 = None
+	        k_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_32.transpose(1, 2);  view_32 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_33: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_16.view(1, 64, 12, 64);  q_16 = None
+	        q_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_33.transpose(1, 2);  view_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_34: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_16.view(1, 64, 12, 64);  v_16 = None
+	        v_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_34.transpose(1, 2);  view_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_24: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_17, k_17, v_17, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_17 = k_17 = v_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_35: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_24.transpose(1, 2);  y_24 = None
+	        contiguous_8: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_35.contiguous();  transpose_35 = None
+	        y_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_8.view(1, 64, 768);  contiguous_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_25, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_);  y_25 = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_33, 0.0, True, False);  linear_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_48 + y_26;  x_48 = y_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_49, (768,), l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_50: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_17, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_17 = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_32: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_50
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_50, 3.0)
+	        mul_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_9;  pow_9 = None
+	        add_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_50 + mul_33;  x_50 = mul_33 = None
+	        mul_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_34;  add_34 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_34);  mul_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_8;  tanh_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_51: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_32 * add_35;  mul_32 = add_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_51, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_);  x_51 = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_8_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_52, 0.0, True, False);  x_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_49 + x_53;  x_49 = x_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_54, (768,), l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_36: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_18, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_18 = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_9 = linear_36.split(768, dim = 2);  linear_36 = None
+	        q_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0]
+	        k_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1]
+	        v_18: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2];  split_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_36: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_18.view(1, 64, 12, 64);  k_18 = None
+	        k_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_36.transpose(1, 2);  view_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_37: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_18.view(1, 64, 12, 64);  q_18 = None
+	        q_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_37.transpose(1, 2);  view_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_18.view(1, 64, 12, 64);  v_18 = None
+	        v_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_38.transpose(1, 2);  view_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_19, k_19, v_19, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_19 = k_19 = v_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_39: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_27.transpose(1, 2);  y_27 = None
+	        contiguous_9: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_39.contiguous();  transpose_39 = None
+	        y_28: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_9.view(1, 64, 768);  contiguous_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_28, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_);  y_28 = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_37, 0.0, True, False);  linear_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_54 + y_29;  x_54 = y_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_55, (768,), l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_19, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_19 = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_56
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_56, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_10;  pow_10 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_56 + mul_37;  x_56 = mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_38;  add_38 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_38);  mul_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_9;  tanh_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_36 * add_39;  mul_36 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_57, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_);  x_57 = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_9_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_58, 0.0, True, False);  x_58 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_60: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_55 + x_59;  x_55 = x_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_60, (768,), l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_40: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_20, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_20 = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_10 = linear_40.split(768, dim = 2);  linear_40 = None
+	        q_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0]
+	        k_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1]
+	        v_20: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2];  split_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_20.view(1, 64, 12, 64);  k_20 = None
+	        k_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_40.transpose(1, 2);  view_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_41: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_20.view(1, 64, 12, 64);  q_20 = None
+	        q_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_41.transpose(1, 2);  view_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_42: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_20.view(1, 64, 12, 64);  v_20 = None
+	        v_21: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_42.transpose(1, 2);  view_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_30: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_21, k_21, v_21, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_21 = k_21 = v_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_43: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_30.transpose(1, 2);  y_30 = None
+	        contiguous_10: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_43.contiguous();  transpose_43 = None
+	        y_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_10.view(1, 64, 768);  contiguous_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_31, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_);  y_31 = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_41, 0.0, True, False);  linear_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_60 + y_32;  x_60 = y_32 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_61, (768,), l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_21, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_21 = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_40: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_62
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_62, 3.0)
+	        mul_41: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_11;  pow_11 = None
+	        add_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_62 + mul_41;  x_62 = mul_41 = None
+	        mul_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_42;  add_42 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_42);  mul_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_43: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_10;  tanh_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_40 * add_43;  mul_40 = add_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_63, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_);  x_63 = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_10_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_64, 0.0, True, False);  x_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_61 + x_65;  x_61 = x_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_66, (768,), l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_ln_1_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        linear_44: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch._C._nn.linear(layer_norm_22, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_);  layer_norm_22 = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_attn_parameters_bias_ = None
+	        split_11 = linear_44.split(768, dim = 2);  linear_44 = None
+	        q_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0]
+	        k_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1]
+	        v_22: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2];  split_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_44: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = k_22.view(1, 64, 12, 64);  k_22 = None
+	        k_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_44.transpose(1, 2);  view_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_45: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = q_22.view(1, 64, 12, 64);  q_22 = None
+	        q_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_45.transpose(1, 2);  view_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_46: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = v_22.view(1, 64, 12, 64);  v_22 = None
+	        v_23: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = view_46.transpose(1, 2);  view_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        y_33: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch._C._nn.scaled_dot_product_attention(q_23, k_23, v_23, attn_mask = None, dropout_p = 0.0, is_causal = True);  q_23 = k_23 = v_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        transpose_47: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = y_33.transpose(1, 2);  y_33 = None
+	        contiguous_11: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = transpose_47.contiguous();  transpose_47 = None
+	        y_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = contiguous_11.view(1, 64, 768);  contiguous_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        linear_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(y_34, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_);  y_34 = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_attn_modules_c_proj_parameters_bias_ = None
+	        y_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(linear_45, 0.0, True, False);  linear_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        x_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_66 + y_35;  x_66 = y_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        layer_norm_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_67, (768,), l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_, 1e-05);  l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_ln_2_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        x_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch._C._nn.linear(layer_norm_23, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_);  layer_norm_23 = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_fc_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.5 * x_68
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.pow(x_68, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.044715 * pow_12;  pow_12 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = x_68 + mul_45;  x_68 = mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 0.7978845608028654 * add_46;  add_46 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.tanh(mul_46);  mul_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = 1.0 + tanh_11;  tanh_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        x_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = mul_44 * add_47;  mul_44 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        x_70: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch._C._nn.linear(x_69, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_, l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_);  x_69 = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_weight_ = l_mod_modules_transformer_modules_h_modules_11_modules_mlp_modules_c_proj_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        x_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.dropout(x_70, 0.0, True, False);  x_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        x_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = x_67 + x_71;  x_67 = x_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        x_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.nn.functional.layer_norm(x_72, (768,), l_mod_modules_transformer_modules_ln_f_parameters_weight_, l_mod_modules_transformer_modules_ln_f_parameters_bias_, 1e-05);  x_72 = l_mod_modules_transformer_modules_ln_f_parameters_weight_ = l_mod_modules_transformer_modules_ln_f_parameters_bias_ = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        getitem_36: "f32[1, 1, 768][768, 768, 1]cuda:0" = x_73[(slice(None, None, None), [-1], slice(None, None, None))];  x_73 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        logits: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch._C._nn.linear(getitem_36, l_mod_modules_transformer_modules_wte_parameters_weight_, None);  getitem_36 = l_mod_modules_transformer_modules_wte_parameters_weight_ = None
+	        return (logits,)
+	        
+V0806 13:55:52.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7a6c1c6e18c552462ba4cf41955a7a9a"}
+	{
+	"name": "OutputGraph.call_user_compiler",
+	"ts": 1722977752361387.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:52.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "25d80fbdb85f51f969591ec89c5dc032"}
+	{
+	"name": "backend_compile",
+	"ts": 1722977752361482.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:53.010000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ad297a89c328174de70a3ac272334587"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1722977753010003.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:54.633000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:345] {"aot_joint_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "cee09115c6728ba1c8bb07bd7fcda343"}
+	class joint_helper(torch.nn.Module):
+	    def forward(self, primals, tangents):
+	        primals_1: "i64[1, 64][64, 1]cuda:0"; primals_2: "f32[50304, 768][768, 1]cuda:0"; primals_3: "f32[1024, 768][768, 1]cuda:0"; primals_4: "f32[768][1]cuda:0"; primals_5: "f32[768][1]cuda:0"; primals_6: "f32[2304, 768][768, 1]cuda:0"; primals_7: "f32[2304][1]cuda:0"; primals_8: "f32[768, 768][768, 1]cuda:0"; primals_9: "f32[768][1]cuda:0"; primals_10: "f32[768][1]cuda:0"; primals_11: "f32[768][1]cuda:0"; primals_12: "f32[3072, 768][768, 1]cuda:0"; primals_13: "f32[3072][1]cuda:0"; primals_14: "f32[768, 3072][3072, 1]cuda:0"; primals_15: "f32[768][1]cuda:0"; primals_16: "f32[768][1]cuda:0"; primals_17: "f32[768][1]cuda:0"; primals_18: "f32[2304, 768][768, 1]cuda:0"; primals_19: "f32[2304][1]cuda:0"; primals_20: "f32[768, 768][768, 1]cuda:0"; primals_21: "f32[768][1]cuda:0"; primals_22: "f32[768][1]cuda:0"; primals_23: "f32[768][1]cuda:0"; primals_24: "f32[3072, 768][768, 1]cuda:0"; primals_25: "f32[3072][1]cuda:0"; primals_26: "f32[768, 3072][3072, 1]cuda:0"; primals_27: "f32[768][1]cuda:0"; primals_28: "f32[768][1]cuda:0"; primals_29: "f32[768][1]cuda:0"; primals_30: "f32[2304, 768][768, 1]cuda:0"; primals_31: "f32[2304][1]cuda:0"; primals_32: "f32[768, 768][768, 1]cuda:0"; primals_33: "f32[768][1]cuda:0"; primals_34: "f32[768][1]cuda:0"; primals_35: "f32[768][1]cuda:0"; primals_36: "f32[3072, 768][768, 1]cuda:0"; primals_37: "f32[3072][1]cuda:0"; primals_38: "f32[768, 3072][3072, 1]cuda:0"; primals_39: "f32[768][1]cuda:0"; primals_40: "f32[768][1]cuda:0"; primals_41: "f32[768][1]cuda:0"; primals_42: "f32[2304, 768][768, 1]cuda:0"; primals_43: "f32[2304][1]cuda:0"; primals_44: "f32[768, 768][768, 1]cuda:0"; primals_45: "f32[768][1]cuda:0"; primals_46: "f32[768][1]cuda:0"; primals_47: "f32[768][1]cuda:0"; primals_48: "f32[3072, 768][768, 1]cuda:0"; primals_49: "f32[3072][1]cuda:0"; primals_50: "f32[768, 3072][3072, 1]cuda:0"; primals_51: "f32[768][1]cuda:0"; primals_52: "f32[768][1]cuda:0"; primals_53: "f32[768][1]cuda:0"; primals_54: "f32[2304, 768][768, 1]cuda:0"; primals_55: "f32[2304][1]cuda:0"; primals_56: "f32[768, 768][768, 1]cuda:0"; primals_57: "f32[768][1]cuda:0"; primals_58: "f32[768][1]cuda:0"; primals_59: "f32[768][1]cuda:0"; primals_60: "f32[3072, 768][768, 1]cuda:0"; primals_61: "f32[3072][1]cuda:0"; primals_62: "f32[768, 3072][3072, 1]cuda:0"; primals_63: "f32[768][1]cuda:0"; primals_64: "f32[768][1]cuda:0"; primals_65: "f32[768][1]cuda:0"; primals_66: "f32[2304, 768][768, 1]cuda:0"; primals_67: "f32[2304][1]cuda:0"; primals_68: "f32[768, 768][768, 1]cuda:0"; primals_69: "f32[768][1]cuda:0"; primals_70: "f32[768][1]cuda:0"; primals_71: "f32[768][1]cuda:0"; primals_72: "f32[3072, 768][768, 1]cuda:0"; primals_73: "f32[3072][1]cuda:0"; primals_74: "f32[768, 3072][3072, 1]cuda:0"; primals_75: "f32[768][1]cuda:0"; primals_76: "f32[768][1]cuda:0"; primals_77: "f32[768][1]cuda:0"; primals_78: "f32[2304, 768][768, 1]cuda:0"; primals_79: "f32[2304][1]cuda:0"; primals_80: "f32[768, 768][768, 1]cuda:0"; primals_81: "f32[768][1]cuda:0"; primals_82: "f32[768][1]cuda:0"; primals_83: "f32[768][1]cuda:0"; primals_84: "f32[3072, 768][768, 1]cuda:0"; primals_85: "f32[3072][1]cuda:0"; primals_86: "f32[768, 3072][3072, 1]cuda:0"; primals_87: "f32[768][1]cuda:0"; primals_88: "f32[768][1]cuda:0"; primals_89: "f32[768][1]cuda:0"; primals_90: "f32[2304, 768][768, 1]cuda:0"; primals_91: "f32[2304][1]cuda:0"; primals_92: "f32[768, 768][768, 1]cuda:0"; primals_93: "f32[768][1]cuda:0"; primals_94: "f32[768][1]cuda:0"; primals_95: "f32[768][1]cuda:0"; primals_96: "f32[3072, 768][768, 1]cuda:0"; primals_97: "f32[3072][1]cuda:0"; primals_98: "f32[768, 3072][3072, 1]cuda:0"; primals_99: "f32[768][1]cuda:0"; primals_100: "f32[768][1]cuda:0"; primals_101: "f32[768][1]cuda:0"; primals_102: "f32[2304, 768][768, 1]cuda:0"; primals_103: "f32[2304][1]cuda:0"; primals_104: "f32[768, 768][768, 1]cuda:0"; primals_105: "f32[768][1]cuda:0"; primals_106: "f32[768][1]cuda:0"; primals_107: "f32[768][1]cuda:0"; primals_108: "f32[3072, 768][768, 1]cuda:0"; primals_109: "f32[3072][1]cuda:0"; primals_110: "f32[768, 3072][3072, 1]cuda:0"; primals_111: "f32[768][1]cuda:0"; primals_112: "f32[768][1]cuda:0"; primals_113: "f32[768][1]cuda:0"; primals_114: "f32[2304, 768][768, 1]cuda:0"; primals_115: "f32[2304][1]cuda:0"; primals_116: "f32[768, 768][768, 1]cuda:0"; primals_117: "f32[768][1]cuda:0"; primals_118: "f32[768][1]cuda:0"; primals_119: "f32[768][1]cuda:0"; primals_120: "f32[3072, 768][768, 1]cuda:0"; primals_121: "f32[3072][1]cuda:0"; primals_122: "f32[768, 3072][3072, 1]cuda:0"; primals_123: "f32[768][1]cuda:0"; primals_124: "f32[768][1]cuda:0"; primals_125: "f32[768][1]cuda:0"; primals_126: "f32[2304, 768][768, 1]cuda:0"; primals_127: "f32[2304][1]cuda:0"; primals_128: "f32[768, 768][768, 1]cuda:0"; primals_129: "f32[768][1]cuda:0"; primals_130: "f32[768][1]cuda:0"; primals_131: "f32[768][1]cuda:0"; primals_132: "f32[3072, 768][768, 1]cuda:0"; primals_133: "f32[3072][1]cuda:0"; primals_134: "f32[768, 3072][3072, 1]cuda:0"; primals_135: "f32[768][1]cuda:0"; primals_136: "f32[768][1]cuda:0"; primals_137: "f32[768][1]cuda:0"; primals_138: "f32[2304, 768][768, 1]cuda:0"; primals_139: "f32[2304][1]cuda:0"; primals_140: "f32[768, 768][768, 1]cuda:0"; primals_141: "f32[768][1]cuda:0"; primals_142: "f32[768][1]cuda:0"; primals_143: "f32[768][1]cuda:0"; primals_144: "f32[3072, 768][768, 1]cuda:0"; primals_145: "f32[3072][1]cuda:0"; primals_146: "f32[768, 3072][3072, 1]cuda:0"; primals_147: "f32[768][1]cuda:0"; primals_148: "f32[768][1]cuda:0"; primals_149: "f32[768][1]cuda:0"; tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"; 
+	    
+	        primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149, tangents_1, = fx_pytree.tree_flatten_spec([primals, tangents], self._in_spec)
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(
+	        iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)
+	        unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb)
+	        add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None
+	        clone: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(add);  add = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean = torch.ops.aten.var_mean.correction(clone, [2], correction = 0, keepdim = True)
+	        getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0]
+	        getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1];  var_mean = None
+	        add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None
+	        rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1);  add_1 = None
+	        sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(clone, getitem_1)
+	        mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None
+	        mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4);  mul = None
+	        add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_2, [64, 768]);  add_2 = None
+	        permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None
+	        addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None
+	        view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm, [1, 64, 2304]);  addmm = None
+	        split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None
+	        getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0]
+	        getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1]
+	        getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2];  split = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None
+	        permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None
+	        permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None
+	        permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)
+	        getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0]
+	        getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1]
+	        getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2]
+	        getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None
+	        alias: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_5)
+	        alias_1: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias);  alias = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3]);  getitem_5 = None
+	        view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None
+	        permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None
+	        addmm_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_9, view_6, permute_5);  primals_9 = None
+	        view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_1, [1, 64, 768]);  addmm_1 = None
+	        clone_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_7);  view_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(clone, clone_1);  clone_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)
+	        getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0]
+	        getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1];  var_mean_1 = None
+	        add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None
+	        rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4);  add_4 = None
+	        sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10)
+	        mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None
+	        mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10);  mul_2 = None
+	        add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_5, [64, 768]);  add_5 = None
+	        permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None
+	        addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None
+	        view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]);  addmm_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        alias_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh)
+	        alias_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_2);  alias_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_7, [64, 3072]);  mul_7 = None
+	        permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None
+	        addmm_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_15, view_10, permute_7);  primals_15 = None
+	        view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_3, [1, 64, 768]);  addmm_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_11);  view_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, clone_2);  clone_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)
+	        getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0]
+	        getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1];  var_mean_2 = None
+	        add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None
+	        rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9);  add_9 = None
+	        sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12)
+	        mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None
+	        mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16);  mul_8 = None
+	        add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_10, [64, 768]);  add_10 = None
+	        permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None
+	        addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None
+	        view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]);  addmm_4 = None
+	        split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None
+	        getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0]
+	        getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1]
+	        getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2];  split_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None
+	        permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None
+	        permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None
+	        permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)
+	        getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0]
+	        getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1]
+	        getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2]
+	        getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None
+	        alias_4: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_16)
+	        alias_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_4);  alias_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3]);  getitem_16 = None
+	        view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None
+	        permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None
+	        addmm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_21, view_18, permute_13);  primals_21 = None
+	        view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_5, [1, 64, 768]);  addmm_5 = None
+	        clone_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_19);  view_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, clone_3);  clone_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)
+	        getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0]
+	        getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1];  var_mean_3 = None
+	        add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None
+	        rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12);  add_12 = None
+	        sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21)
+	        mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None
+	        mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22);  mul_10 = None
+	        add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_13, [64, 768]);  add_13 = None
+	        permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None
+	        addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None
+	        view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]);  addmm_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        alias_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_1)
+	        alias_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_6);  alias_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_15, [64, 3072]);  mul_15 = None
+	        permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None
+	        addmm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_27, view_22, permute_15);  primals_27 = None
+	        view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_7, [1, 64, 768]);  addmm_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_23);  view_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, clone_4);  clone_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)
+	        getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0]
+	        getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1];  var_mean_4 = None
+	        add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None
+	        rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17);  add_17 = None
+	        sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23)
+	        mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None
+	        mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28);  mul_16 = None
+	        add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_18, [64, 768]);  add_18 = None
+	        permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None
+	        addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None
+	        view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]);  addmm_8 = None
+	        split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None
+	        getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0]
+	        getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1]
+	        getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2];  split_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None
+	        permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None
+	        permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None
+	        permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)
+	        getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0]
+	        getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1]
+	        getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2]
+	        getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None
+	        alias_8: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_27)
+	        alias_9: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_8);  alias_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3]);  getitem_27 = None
+	        view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None
+	        permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None
+	        addmm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_33, view_30, permute_21);  primals_33 = None
+	        view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_9, [1, 64, 768]);  addmm_9 = None
+	        clone_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_31);  view_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, clone_5);  clone_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)
+	        getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0]
+	        getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1];  var_mean_5 = None
+	        add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None
+	        rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20);  add_20 = None
+	        sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32)
+	        mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None
+	        mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34);  mul_18 = None
+	        add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_21, [64, 768]);  add_21 = None
+	        permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None
+	        addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None
+	        view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]);  addmm_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        alias_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_2)
+	        alias_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_10);  alias_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_23, [64, 3072]);  mul_23 = None
+	        permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None
+	        addmm_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_39, view_34, permute_23);  primals_39 = None
+	        view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_11, [1, 64, 768]);  addmm_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_35);  view_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, clone_6);  clone_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)
+	        getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0]
+	        getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1];  var_mean_6 = None
+	        add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None
+	        rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25);  add_25 = None
+	        sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34)
+	        mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None
+	        mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40);  mul_24 = None
+	        add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_26, [64, 768]);  add_26 = None
+	        permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None
+	        addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None
+	        view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]);  addmm_12 = None
+	        split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None
+	        getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0]
+	        getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1]
+	        getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2];  split_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None
+	        permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None
+	        permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None
+	        permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)
+	        getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0]
+	        getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1]
+	        getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2]
+	        getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None
+	        alias_12: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_38)
+	        alias_13: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_12);  alias_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3]);  getitem_38 = None
+	        view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None
+	        permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None
+	        addmm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_45, view_42, permute_29);  primals_45 = None
+	        view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_13, [1, 64, 768]);  addmm_13 = None
+	        clone_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_43);  view_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, clone_7);  clone_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)
+	        getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0]
+	        getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1];  var_mean_7 = None
+	        add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None
+	        rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28);  add_28 = None
+	        sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43)
+	        mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None
+	        mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46);  mul_26 = None
+	        add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_29, [64, 768]);  add_29 = None
+	        permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None
+	        addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None
+	        view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]);  addmm_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        alias_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_3)
+	        alias_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_14);  alias_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_31, [64, 3072]);  mul_31 = None
+	        permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None
+	        addmm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_51, view_46, permute_31);  primals_51 = None
+	        view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_15, [1, 64, 768]);  addmm_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_47);  view_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, clone_8);  clone_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)
+	        getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0]
+	        getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1];  var_mean_8 = None
+	        add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None
+	        rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33);  add_33 = None
+	        sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45)
+	        mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None
+	        mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52);  mul_32 = None
+	        add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_34, [64, 768]);  add_34 = None
+	        permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None
+	        addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None
+	        view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]);  addmm_16 = None
+	        split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None
+	        getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0]
+	        getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1]
+	        getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2];  split_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None
+	        permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None
+	        permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None
+	        permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)
+	        getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0]
+	        getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1]
+	        getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2]
+	        getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None
+	        alias_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_49)
+	        alias_17: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_16);  alias_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3]);  getitem_49 = None
+	        view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None
+	        permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None
+	        addmm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_57, view_54, permute_37);  primals_57 = None
+	        view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_17, [1, 64, 768]);  addmm_17 = None
+	        clone_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_55);  view_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, clone_9);  clone_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)
+	        getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0]
+	        getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1];  var_mean_9 = None
+	        add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None
+	        rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36);  add_36 = None
+	        sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54)
+	        mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None
+	        mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58);  mul_34 = None
+	        add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_37, [64, 768]);  add_37 = None
+	        permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None
+	        addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None
+	        view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]);  addmm_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        alias_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_4)
+	        alias_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_18);  alias_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_39, [64, 3072]);  mul_39 = None
+	        permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None
+	        addmm_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_63, view_58, permute_39);  primals_63 = None
+	        view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_19, [1, 64, 768]);  addmm_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_59);  view_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, clone_10);  clone_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)
+	        getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0]
+	        getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1];  var_mean_10 = None
+	        add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None
+	        rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41);  add_41 = None
+	        sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56)
+	        mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None
+	        mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64);  mul_40 = None
+	        add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_42, [64, 768]);  add_42 = None
+	        permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None
+	        addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None
+	        view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]);  addmm_20 = None
+	        split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None
+	        getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0]
+	        getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1]
+	        getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2];  split_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None
+	        permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None
+	        permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None
+	        permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)
+	        getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0]
+	        getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1]
+	        getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2]
+	        getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None
+	        alias_20: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_60)
+	        alias_21: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_20);  alias_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3]);  getitem_60 = None
+	        view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None
+	        permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None
+	        addmm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_69, view_66, permute_45);  primals_69 = None
+	        view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_21, [1, 64, 768]);  addmm_21 = None
+	        clone_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_67);  view_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, clone_11);  clone_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)
+	        getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0]
+	        getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1];  var_mean_11 = None
+	        add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None
+	        rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44);  add_44 = None
+	        sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65)
+	        mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None
+	        mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70);  mul_42 = None
+	        add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_45, [64, 768]);  add_45 = None
+	        permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None
+	        addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None
+	        view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]);  addmm_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        alias_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_5)
+	        alias_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_22);  alias_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_47, [64, 3072]);  mul_47 = None
+	        permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None
+	        addmm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_75, view_70, permute_47);  primals_75 = None
+	        view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_23, [1, 64, 768]);  addmm_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_71);  view_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, clone_12);  clone_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)
+	        getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0]
+	        getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1];  var_mean_12 = None
+	        add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None
+	        rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49);  add_49 = None
+	        sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67)
+	        mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None
+	        mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76);  mul_48 = None
+	        add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_50, [64, 768]);  add_50 = None
+	        permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None
+	        addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None
+	        view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]);  addmm_24 = None
+	        split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None
+	        getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0]
+	        getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1]
+	        getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2];  split_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None
+	        permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None
+	        permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None
+	        permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)
+	        getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0]
+	        getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1]
+	        getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2]
+	        getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None
+	        alias_24: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_71)
+	        alias_25: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_24);  alias_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3]);  getitem_71 = None
+	        view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None
+	        permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None
+	        addmm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_81, view_78, permute_53);  primals_81 = None
+	        view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_25, [1, 64, 768]);  addmm_25 = None
+	        clone_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_79);  view_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, clone_13);  clone_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)
+	        getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0]
+	        getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1];  var_mean_13 = None
+	        add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None
+	        rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52);  add_52 = None
+	        sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76)
+	        mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None
+	        mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82);  mul_50 = None
+	        add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_53, [64, 768]);  add_53 = None
+	        permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None
+	        addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None
+	        view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]);  addmm_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None
+	        mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        alias_26: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_6)
+	        alias_27: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_26);  alias_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_55, [64, 3072]);  mul_55 = None
+	        permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None
+	        addmm_27: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_87, view_82, permute_55);  primals_87 = None
+	        view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_27, [1, 64, 768]);  addmm_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_83);  view_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, clone_14);  clone_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)
+	        getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0]
+	        getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1];  var_mean_14 = None
+	        add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None
+	        rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57);  add_57 = None
+	        sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78)
+	        mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None
+	        mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88);  mul_56 = None
+	        add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_58, [64, 768]);  add_58 = None
+	        permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None
+	        addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None
+	        view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]);  addmm_28 = None
+	        split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None
+	        getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0]
+	        getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1]
+	        getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2];  split_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None
+	        permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None
+	        permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None
+	        permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)
+	        getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0]
+	        getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1]
+	        getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2]
+	        getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None
+	        alias_28: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_82)
+	        alias_29: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_28);  alias_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3]);  getitem_82 = None
+	        view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None
+	        permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None
+	        addmm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_93, view_90, permute_61);  primals_93 = None
+	        view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_29, [1, 64, 768]);  addmm_29 = None
+	        clone_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_91);  view_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, clone_15);  clone_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)
+	        getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0]
+	        getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1];  var_mean_15 = None
+	        add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None
+	        rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60);  add_60 = None
+	        sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87)
+	        mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None
+	        mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94);  mul_58 = None
+	        add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_61, [64, 768]);  add_61 = None
+	        permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None
+	        addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None
+	        view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]);  addmm_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None
+	        mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        alias_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_7)
+	        alias_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_30);  alias_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_63, [64, 3072]);  mul_63 = None
+	        permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None
+	        addmm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_99, view_94, permute_63);  primals_99 = None
+	        view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_31, [1, 64, 768]);  addmm_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_95);  view_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, clone_16);  clone_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)
+	        getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0]
+	        getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1];  var_mean_16 = None
+	        add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None
+	        rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65);  add_65 = None
+	        sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89)
+	        mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None
+	        mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100);  mul_64 = None
+	        add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_66, [64, 768]);  add_66 = None
+	        permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None
+	        addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None
+	        view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]);  addmm_32 = None
+	        split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None
+	        getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0]
+	        getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1]
+	        getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2];  split_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None
+	        permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None
+	        permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None
+	        permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)
+	        getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0]
+	        getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1]
+	        getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2]
+	        getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None
+	        alias_32: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_93)
+	        alias_33: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_32);  alias_32 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3]);  getitem_93 = None
+	        view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None
+	        permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None
+	        addmm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_105, view_102, permute_69);  primals_105 = None
+	        view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_33, [1, 64, 768]);  addmm_33 = None
+	        clone_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_103);  view_103 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, clone_17);  clone_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)
+	        getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0]
+	        getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1];  var_mean_17 = None
+	        add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None
+	        rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68);  add_68 = None
+	        sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98)
+	        mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None
+	        mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106);  mul_66 = None
+	        add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_69, [64, 768]);  add_69 = None
+	        permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None
+	        addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None
+	        view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]);  addmm_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None
+	        mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        alias_34: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_8)
+	        alias_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_34);  alias_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_71, [64, 3072]);  mul_71 = None
+	        permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None
+	        addmm_35: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_111, view_106, permute_71);  primals_111 = None
+	        view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_35, [1, 64, 768]);  addmm_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_107);  view_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, clone_18);  clone_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)
+	        getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0]
+	        getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1];  var_mean_18 = None
+	        add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None
+	        rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73);  add_73 = None
+	        sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100)
+	        mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None
+	        mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112);  mul_72 = None
+	        add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_74, [64, 768]);  add_74 = None
+	        permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None
+	        addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None
+	        view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]);  addmm_36 = None
+	        split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None
+	        getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0]
+	        getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1]
+	        getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2];  split_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None
+	        permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None
+	        permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None
+	        permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)
+	        getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0]
+	        getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1]
+	        getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2]
+	        getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None
+	        alias_36: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_104)
+	        alias_37: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_36);  alias_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3]);  getitem_104 = None
+	        view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None
+	        permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None
+	        addmm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_117, view_114, permute_77);  primals_117 = None
+	        view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_37, [1, 64, 768]);  addmm_37 = None
+	        clone_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_115);  view_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, clone_19);  clone_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)
+	        getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0]
+	        getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1];  var_mean_19 = None
+	        add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None
+	        rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76);  add_76 = None
+	        sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109)
+	        mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None
+	        mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118);  mul_74 = None
+	        add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_77, [64, 768]);  add_77 = None
+	        permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None
+	        addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None
+	        view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]);  addmm_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None
+	        mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        alias_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_9)
+	        alias_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_38);  alias_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_79, [64, 3072]);  mul_79 = None
+	        permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None
+	        addmm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_123, view_118, permute_79);  primals_123 = None
+	        view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_39, [1, 64, 768]);  addmm_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_119);  view_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, clone_20);  clone_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)
+	        getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0]
+	        getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1];  var_mean_20 = None
+	        add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None
+	        rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81);  add_81 = None
+	        sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111)
+	        mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None
+	        mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124);  mul_80 = None
+	        add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_82, [64, 768]);  add_82 = None
+	        permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None
+	        addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None
+	        view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]);  addmm_40 = None
+	        split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None
+	        getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0]
+	        getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1]
+	        getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2];  split_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None
+	        permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None
+	        permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None
+	        permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)
+	        getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0]
+	        getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1]
+	        getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2]
+	        getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None
+	        alias_40: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_115)
+	        alias_41: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_40);  alias_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3]);  getitem_115 = None
+	        view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None
+	        permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None
+	        addmm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_129, view_126, permute_85);  primals_129 = None
+	        view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_41, [1, 64, 768]);  addmm_41 = None
+	        clone_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_127);  view_127 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, clone_21);  clone_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)
+	        getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0]
+	        getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1];  var_mean_21 = None
+	        add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None
+	        rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84);  add_84 = None
+	        sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120)
+	        mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None
+	        mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130);  mul_82 = None
+	        add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_85, [64, 768]);  add_85 = None
+	        permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None
+	        addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None
+	        view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]);  addmm_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None
+	        mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        alias_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_10)
+	        alias_43: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_42);  alias_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_87, [64, 3072]);  mul_87 = None
+	        permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None
+	        addmm_43: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_135, view_130, permute_87);  primals_135 = None
+	        view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_43, [1, 64, 768]);  addmm_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_131);  view_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, clone_22);  clone_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)
+	        getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0]
+	        getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1];  var_mean_22 = None
+	        add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None
+	        rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89);  add_89 = None
+	        sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122)
+	        mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None
+	        mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136);  mul_88 = None
+	        add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_90, [64, 768]);  add_90 = None
+	        permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None
+	        addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None
+	        view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]);  addmm_44 = None
+	        split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None
+	        getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0]
+	        getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1]
+	        getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2];  split_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None
+	        permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None
+	        permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None
+	        permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)
+	        getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0]
+	        getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1]
+	        getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2]
+	        getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None
+	        alias_44: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(getitem_126)
+	        alias_45: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_44);  alias_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3]);  getitem_126 = None
+	        view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None
+	        permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None
+	        addmm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_141, view_138, permute_93);  primals_141 = None
+	        view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_45, [1, 64, 768]);  addmm_45 = None
+	        clone_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_139);  view_139 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, clone_23);  clone_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)
+	        getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0]
+	        getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1];  var_mean_23 = None
+	        add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None
+	        rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92);  add_92 = None
+	        sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131)
+	        mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None
+	        mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142);  mul_90 = None
+	        add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_93, [64, 768]);  add_93 = None
+	        permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None
+	        addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None
+	        view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]);  addmm_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None
+	        mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        alias_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(tanh_11)
+	        alias_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_46);  alias_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_95, [64, 3072]);  mul_95 = None
+	        permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None
+	        addmm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_147, view_142, permute_95);  primals_147 = None
+	        view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_47, [1, 64, 768]);  addmm_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:131 in forward, code: x = self.dropout(x)
+	        clone_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.clone.default(view_143);  view_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, clone_24);  clone_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)
+	        getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0]
+	        getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1];  var_mean_24 = None
+	        add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None
+	        rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97);  add_97 = None
+	        sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133)
+	        mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None
+	        mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148);  mul_96 = None
+	        add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        slice_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice.Tensor(add_98, 0, 0, 9223372036854775807);  add_98 = None
+	        _tensor_constant0 = self._tensor_constant0
+	        lift_fresh_copy: "i64[1][1]cuda:0" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0);  _tensor_constant0 = None
+	        slice_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice.Tensor(slice_1, 2, 0, 9223372036854775807);  slice_1 = None
+	        index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(slice_2, [None, lift_fresh_copy]);  slice_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None
+	        view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.view.default(index, [1, 768]);  index = None
+	        mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96)
+	        view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.view.default(mm, [1, 1, 50304]);  mm = None
+	        view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.view.default(tangents_1, [1, 50304]);  tangents_1 = None
+	        permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0])
+	        mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None
+	        permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None
+	        permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None
+	        mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None
+	        view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_2, [1, 1, 768]);  mm_2 = None
+	        permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        full: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put.default(full, [None, lift_fresh_copy], view_147, True);  full = lift_fresh_copy = view_147 = None
+	        full_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        slice_scatter: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice_scatter.default(full_1, index_put, 2, 0, 9223372036854775807);  full_1 = index_put = None
+	        full_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        slice_scatter_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.slice_scatter.default(full_2, slice_scatter, 0, 0, 9223372036854775807);  full_2 = slice_scatter = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None
+	        mul_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_25, rsqrt_24);  sub_25 = None
+	        mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(slice_scatter_1, primals_148);  primals_148 = None
+	        mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768)
+	        sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)
+	        mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_98);  mul_99 = None
+	        sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None
+	        mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_98, sum_2);  sum_2 = None
+	        sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None
+	        sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None
+	        div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None
+	        mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None
+	        mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(slice_scatter_1, mul_98);  mul_98 = None
+	        sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None
+	        sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(slice_scatter_1, [0, 1]);  slice_scatter_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(mul_103, [64, 768])
+	        permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None
+	        mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None
+	        permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0])
+	        mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None
+	        permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None
+	        sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None
+	        view_149: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_5, [768]);  sum_5 = None
+	        permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None
+	        view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_3, [1, 64, 3072]);  mm_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None
+	        mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_48: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_47);  alias_47 = None
+	        alias_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_48);  alias_48 = None
+	        mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_49, alias_49);  alias_49 = None
+	        sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None
+	        mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None
+	        mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None
+	        mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715)
+	        pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None
+	        mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None
+	        mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_100, [64, 3072]);  add_100 = None
+	        permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None
+	        mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None
+	        permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0])
+	        mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None
+	        permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None
+	        sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None
+	        view_152: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_6, [3072]);  sum_6 = None
+	        permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None
+	        view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_5, [1, 64, 768]);  mm_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131);  add_91 = getitem_131 = None
+	        mul_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_29, rsqrt_23);  sub_29 = None
+	        mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None
+	        mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768)
+	        sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)
+	        mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_114);  mul_115 = None
+	        sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None
+	        mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_114, sum_8);  sum_8 = None
+	        sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None
+	        sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None
+	        div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None
+	        mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None
+	        mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_114);  mul_114 = None
+	        sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None
+	        sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_101, [64, 768])
+	        permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None
+	        mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None
+	        permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0])
+	        mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None
+	        permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None
+	        sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None
+	        view_155: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_11, [768]);  sum_11 = None
+	        permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None
+	        view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_7, [1, 64, 768]);  mm_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]);  view_156 = None
+	        permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_50: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_45);  alias_45 = None
+	        alias_51: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_50);  alias_50 = None
+	        _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, alias_51, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = alias_51 = getitem_127 = getitem_128 = getitem_129 = None
+	        getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0]
+	        getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1]
+	        getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None
+	        view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_114, [1, 64, 768]);  permute_114 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None
+	        view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_115, [1, 64, 768]);  permute_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None
+	        view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_116, [1, 64, 768]);  permute_116 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None
+	        view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat, [64, 2304]);  cat = None
+	        permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None
+	        mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None
+	        permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0])
+	        mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None
+	        permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None
+	        sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None
+	        view_162: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_12, [2304]);  sum_12 = None
+	        permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None
+	        view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_9, [1, 64, 768]);  mm_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122);  add_88 = getitem_122 = None
+	        mul_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_32, rsqrt_22);  sub_32 = None
+	        mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None
+	        mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768)
+	        sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)
+	        mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_121);  mul_122 = None
+	        sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None
+	        mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_121, sum_14);  sum_14 = None
+	        sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None
+	        sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None
+	        div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None
+	        mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None
+	        mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_121);  mul_121 = None
+	        sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None
+	        sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_102, [64, 768])
+	        permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None
+	        mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None
+	        permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0])
+	        mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None
+	        permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None
+	        sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None
+	        view_165: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_17, [768]);  sum_17 = None
+	        permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None
+	        view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_11, [1, 64, 3072]);  mm_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None
+	        mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_43);  alias_43 = None
+	        alias_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_52);  alias_52 = None
+	        mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_53, alias_53);  alias_53 = None
+	        sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None
+	        mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None
+	        mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None
+	        mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715)
+	        pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None
+	        mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None
+	        mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_104, [64, 3072]);  add_104 = None
+	        permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None
+	        mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None
+	        permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0])
+	        mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None
+	        permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None
+	        sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None
+	        view_168: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_18, [3072]);  sum_18 = None
+	        permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None
+	        view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_13, [1, 64, 768]);  mm_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_36: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120);  add_83 = getitem_120 = None
+	        mul_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_36, rsqrt_21);  sub_36 = None
+	        mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None
+	        mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768)
+	        sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)
+	        mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_137);  mul_138 = None
+	        sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None
+	        mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_137, sum_20);  sum_20 = None
+	        sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None
+	        sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None
+	        div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None
+	        mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None
+	        mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_137);  mul_137 = None
+	        sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None
+	        sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_105, [64, 768])
+	        permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None
+	        mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None
+	        permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0])
+	        mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None
+	        permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None
+	        sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None
+	        view_171: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_23, [768]);  sum_23 = None
+	        permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None
+	        view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_15, [1, 64, 768]);  mm_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]);  view_172 = None
+	        permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_54: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_41);  alias_41 = None
+	        alias_55: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_54);  alias_54 = None
+	        _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, alias_55, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = alias_55 = getitem_116 = getitem_117 = getitem_118 = None
+	        getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0]
+	        getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1]
+	        getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None
+	        view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_134, [1, 64, 768]);  permute_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None
+	        view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_135, [1, 64, 768]);  permute_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None
+	        view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_136, [1, 64, 768]);  permute_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None
+	        view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_1, [64, 2304]);  cat_1 = None
+	        permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None
+	        mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None
+	        permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0])
+	        mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None
+	        permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None
+	        sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None
+	        view_178: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_24, [2304]);  sum_24 = None
+	        permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None
+	        view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_17, [1, 64, 768]);  mm_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_39: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111);  add_80 = getitem_111 = None
+	        mul_144: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_39, rsqrt_20);  sub_39 = None
+	        mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None
+	        mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768)
+	        sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)
+	        mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_144);  mul_145 = None
+	        sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None
+	        mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_144, sum_26);  sum_26 = None
+	        sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None
+	        sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None
+	        div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None
+	        mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None
+	        mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_144);  mul_144 = None
+	        sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None
+	        sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_106, [64, 768])
+	        permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None
+	        mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None
+	        permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0])
+	        mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None
+	        permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None
+	        sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None
+	        view_181: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_29, [768]);  sum_29 = None
+	        permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None
+	        view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_19, [1, 64, 3072]);  mm_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None
+	        mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_39);  alias_39 = None
+	        alias_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_56);  alias_56 = None
+	        mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_57, alias_57);  alias_57 = None
+	        sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None
+	        mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None
+	        mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None
+	        mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715)
+	        pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None
+	        mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None
+	        mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_108, [64, 3072]);  add_108 = None
+	        permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None
+	        mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None
+	        permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0])
+	        mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None
+	        permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None
+	        sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None
+	        view_184: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_30, [3072]);  sum_30 = None
+	        permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None
+	        view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_21, [1, 64, 768]);  mm_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109);  add_75 = getitem_109 = None
+	        mul_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_43, rsqrt_19);  sub_43 = None
+	        mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None
+	        mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768)
+	        sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)
+	        mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_160);  mul_161 = None
+	        sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None
+	        mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_160, sum_32);  sum_32 = None
+	        sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None
+	        sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None
+	        div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None
+	        mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None
+	        mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_160);  mul_160 = None
+	        sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None
+	        sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_109, [64, 768])
+	        permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None
+	        mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None
+	        permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0])
+	        mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None
+	        permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None
+	        sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None
+	        view_187: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_35, [768]);  sum_35 = None
+	        permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None
+	        view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_23, [1, 64, 768]);  mm_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]);  view_188 = None
+	        permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_58: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_37);  alias_37 = None
+	        alias_59: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_58);  alias_58 = None
+	        _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, alias_59, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = alias_59 = getitem_105 = getitem_106 = getitem_107 = None
+	        getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0]
+	        getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1]
+	        getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None
+	        view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_154, [1, 64, 768]);  permute_154 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None
+	        view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_155, [1, 64, 768]);  permute_155 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None
+	        view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_156, [1, 64, 768]);  permute_156 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None
+	        view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_2, [64, 2304]);  cat_2 = None
+	        permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None
+	        mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None
+	        permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0])
+	        mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None
+	        permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None
+	        sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None
+	        view_194: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_36, [2304]);  sum_36 = None
+	        permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None
+	        view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_25, [1, 64, 768]);  mm_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_46: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100);  add_72 = getitem_100 = None
+	        mul_167: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_46, rsqrt_18);  sub_46 = None
+	        mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None
+	        mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768)
+	        sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)
+	        mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_167);  mul_168 = None
+	        sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None
+	        mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_167, sum_38);  sum_38 = None
+	        sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None
+	        sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None
+	        div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None
+	        mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None
+	        mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_167);  mul_167 = None
+	        sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None
+	        sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_110, [64, 768])
+	        permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None
+	        mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None
+	        permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0])
+	        mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None
+	        permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None
+	        sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None
+	        view_197: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_41, [768]);  sum_41 = None
+	        permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None
+	        view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_27, [1, 64, 3072]);  mm_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None
+	        mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_35);  alias_35 = None
+	        alias_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_60);  alias_60 = None
+	        mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_61, alias_61);  alias_61 = None
+	        sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None
+	        mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None
+	        mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None
+	        mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715)
+	        pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None
+	        mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None
+	        mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_112, [64, 3072]);  add_112 = None
+	        permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None
+	        mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None
+	        permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0])
+	        mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None
+	        permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None
+	        sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None
+	        view_200: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_42, [3072]);  sum_42 = None
+	        permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None
+	        view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_29, [1, 64, 768]);  mm_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98);  add_67 = getitem_98 = None
+	        mul_183: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_50, rsqrt_17);  sub_50 = None
+	        mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None
+	        mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768)
+	        sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)
+	        mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_183);  mul_184 = None
+	        sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None
+	        mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_183, sum_44);  sum_44 = None
+	        sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None
+	        sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None
+	        div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None
+	        mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None
+	        mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_183);  mul_183 = None
+	        sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None
+	        sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_113, [64, 768])
+	        permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None
+	        mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None
+	        permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0])
+	        mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None
+	        permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None
+	        sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None
+	        view_203: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_47, [768]);  sum_47 = None
+	        permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None
+	        view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_31, [1, 64, 768]);  mm_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]);  view_204 = None
+	        permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_62: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_33);  alias_33 = None
+	        alias_63: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_62);  alias_62 = None
+	        _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, alias_63, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = alias_63 = getitem_94 = getitem_95 = getitem_96 = None
+	        getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0]
+	        getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1]
+	        getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None
+	        view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_174, [1, 64, 768]);  permute_174 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None
+	        view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_175, [1, 64, 768]);  permute_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None
+	        view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_176, [1, 64, 768]);  permute_176 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None
+	        view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_3, [64, 2304]);  cat_3 = None
+	        permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None
+	        mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None
+	        permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0])
+	        mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None
+	        permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None
+	        sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None
+	        view_210: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_48, [2304]);  sum_48 = None
+	        permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None
+	        view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_33, [1, 64, 768]);  mm_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89);  add_64 = getitem_89 = None
+	        mul_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_53, rsqrt_16);  sub_53 = None
+	        mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None
+	        mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768)
+	        sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)
+	        mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_190);  mul_191 = None
+	        sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None
+	        mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_190, sum_50);  sum_50 = None
+	        sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None
+	        sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None
+	        div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None
+	        mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None
+	        mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_190);  mul_190 = None
+	        sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None
+	        sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_114, [64, 768])
+	        permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None
+	        mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None
+	        permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0])
+	        mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None
+	        permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None
+	        sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None
+	        view_213: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_53, [768]);  sum_53 = None
+	        permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None
+	        view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_35, [1, 64, 3072]);  mm_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None
+	        mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_64: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_31);  alias_31 = None
+	        alias_65: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_64);  alias_64 = None
+	        mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_65, alias_65);  alias_65 = None
+	        sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None
+	        mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None
+	        mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None
+	        mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715)
+	        pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None
+	        mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None
+	        mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_116, [64, 3072]);  add_116 = None
+	        permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None
+	        mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None
+	        permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0])
+	        mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None
+	        permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None
+	        sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None
+	        view_216: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_54, [3072]);  sum_54 = None
+	        permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None
+	        view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_37, [1, 64, 768]);  mm_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87);  add_59 = getitem_87 = None
+	        mul_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_57, rsqrt_15);  sub_57 = None
+	        mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None
+	        mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768)
+	        sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)
+	        mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_206);  mul_207 = None
+	        sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None
+	        mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_206, sum_56);  sum_56 = None
+	        sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None
+	        sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None
+	        div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None
+	        mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None
+	        mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_206);  mul_206 = None
+	        sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None
+	        sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_117, [64, 768])
+	        permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None
+	        mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None
+	        permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0])
+	        mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None
+	        permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None
+	        sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None
+	        view_219: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_59, [768]);  sum_59 = None
+	        permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None
+	        view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_39, [1, 64, 768]);  mm_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]);  view_220 = None
+	        permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_66: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_29);  alias_29 = None
+	        alias_67: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_66);  alias_66 = None
+	        _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, alias_67, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = alias_67 = getitem_83 = getitem_84 = getitem_85 = None
+	        getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0]
+	        getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1]
+	        getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None
+	        view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_194, [1, 64, 768]);  permute_194 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None
+	        view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_195, [1, 64, 768]);  permute_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None
+	        view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_196, [1, 64, 768]);  permute_196 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None
+	        view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_4, [64, 2304]);  cat_4 = None
+	        permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None
+	        mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None
+	        permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0])
+	        mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None
+	        permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None
+	        sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None
+	        view_226: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_60, [2304]);  sum_60 = None
+	        permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None
+	        view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_41, [1, 64, 768]);  mm_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_60: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78);  add_56 = getitem_78 = None
+	        mul_213: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_60, rsqrt_14);  sub_60 = None
+	        mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None
+	        mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768)
+	        sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)
+	        mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_213);  mul_214 = None
+	        sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None
+	        mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_213, sum_62);  sum_62 = None
+	        sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None
+	        sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None
+	        div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None
+	        mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None
+	        mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_213);  mul_213 = None
+	        sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None
+	        sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_118, [64, 768])
+	        permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None
+	        mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None
+	        permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0])
+	        mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None
+	        permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None
+	        sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None
+	        view_229: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_65, [768]);  sum_65 = None
+	        permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None
+	        view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_43, [1, 64, 3072]);  mm_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None
+	        mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_27);  alias_27 = None
+	        alias_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_68);  alias_68 = None
+	        mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_69, alias_69);  alias_69 = None
+	        sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None
+	        mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None
+	        mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None
+	        mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715)
+	        pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None
+	        mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None
+	        mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_120, [64, 3072]);  add_120 = None
+	        permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None
+	        mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None
+	        permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0])
+	        mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None
+	        permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None
+	        sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None
+	        view_232: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_66, [3072]);  sum_66 = None
+	        permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None
+	        view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_45, [1, 64, 768]);  mm_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76);  add_51 = getitem_76 = None
+	        mul_229: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_64, rsqrt_13);  sub_64 = None
+	        mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None
+	        mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768)
+	        sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)
+	        mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_229);  mul_230 = None
+	        sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None
+	        mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_229, sum_68);  sum_68 = None
+	        sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None
+	        sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None
+	        div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None
+	        mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None
+	        mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_229);  mul_229 = None
+	        sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None
+	        sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_121, [64, 768])
+	        permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None
+	        mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None
+	        permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0])
+	        mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None
+	        permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None
+	        sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None
+	        view_235: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_71, [768]);  sum_71 = None
+	        permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None
+	        view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_47, [1, 64, 768]);  mm_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]);  view_236 = None
+	        permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_70: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_25);  alias_25 = None
+	        alias_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_70);  alias_70 = None
+	        _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, alias_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = alias_71 = getitem_72 = getitem_73 = getitem_74 = None
+	        getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0]
+	        getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1]
+	        getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None
+	        view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_214, [1, 64, 768]);  permute_214 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None
+	        view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_215, [1, 64, 768]);  permute_215 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None
+	        view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_216, [1, 64, 768]);  permute_216 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None
+	        view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_5, [64, 2304]);  cat_5 = None
+	        permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None
+	        mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None
+	        permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0])
+	        mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None
+	        permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None
+	        sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None
+	        view_242: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_72, [2304]);  sum_72 = None
+	        permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None
+	        view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_49, [1, 64, 768]);  mm_49 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67);  add_48 = getitem_67 = None
+	        mul_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_67, rsqrt_12);  sub_67 = None
+	        mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None
+	        mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768)
+	        sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)
+	        mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_236);  mul_237 = None
+	        sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None
+	        mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_236, sum_74);  sum_74 = None
+	        sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None
+	        sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None
+	        div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None
+	        mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None
+	        mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_236);  mul_236 = None
+	        sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None
+	        sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_122, [64, 768])
+	        permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None
+	        mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None
+	        permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0])
+	        mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None
+	        permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None
+	        sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None
+	        view_245: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_77, [768]);  sum_77 = None
+	        permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None
+	        view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_51, [1, 64, 3072]);  mm_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None
+	        mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_72: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_23);  alias_23 = None
+	        alias_73: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_72);  alias_72 = None
+	        mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_73, alias_73);  alias_73 = None
+	        sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None
+	        mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None
+	        mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None
+	        mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715)
+	        pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None
+	        mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None
+	        mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_124, [64, 3072]);  add_124 = None
+	        permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None
+	        mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None
+	        permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0])
+	        mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None
+	        permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None
+	        sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None
+	        view_248: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_78, [3072]);  sum_78 = None
+	        permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None
+	        view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_53, [1, 64, 768]);  mm_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65);  add_43 = getitem_65 = None
+	        mul_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_71, rsqrt_11);  sub_71 = None
+	        mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None
+	        mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768)
+	        sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)
+	        mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_252);  mul_253 = None
+	        sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None
+	        mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_252, sum_80);  sum_80 = None
+	        sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None
+	        sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None
+	        div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None
+	        mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None
+	        mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_252);  mul_252 = None
+	        sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None
+	        sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_125, [64, 768])
+	        permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None
+	        mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None
+	        permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0])
+	        mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None
+	        permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None
+	        sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None
+	        view_251: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_83, [768]);  sum_83 = None
+	        permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None
+	        view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_55, [1, 64, 768]);  mm_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]);  view_252 = None
+	        permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_74: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_21);  alias_21 = None
+	        alias_75: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_74);  alias_74 = None
+	        _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, alias_75, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = alias_75 = getitem_61 = getitem_62 = getitem_63 = None
+	        getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0]
+	        getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1]
+	        getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None
+	        view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_234, [1, 64, 768]);  permute_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None
+	        view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_235, [1, 64, 768]);  permute_235 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None
+	        view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_236, [1, 64, 768]);  permute_236 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None
+	        view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_6, [64, 2304]);  cat_6 = None
+	        permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None
+	        mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None
+	        permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0])
+	        mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None
+	        permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None
+	        sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None
+	        view_258: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_84, [2304]);  sum_84 = None
+	        permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None
+	        view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_57, [1, 64, 768]);  mm_57 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56);  add_40 = getitem_56 = None
+	        mul_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_74, rsqrt_10);  sub_74 = None
+	        mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None
+	        mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768)
+	        sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)
+	        mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_259);  mul_260 = None
+	        sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None
+	        mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_259, sum_86);  sum_86 = None
+	        sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None
+	        sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None
+	        div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None
+	        mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None
+	        mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_259);  mul_259 = None
+	        sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None
+	        sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_126, [64, 768])
+	        permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None
+	        mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None
+	        permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0])
+	        mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None
+	        permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None
+	        sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None
+	        view_261: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_89, [768]);  sum_89 = None
+	        permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None
+	        view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_59, [1, 64, 3072]);  mm_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None
+	        mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_19);  alias_19 = None
+	        alias_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_76);  alias_76 = None
+	        mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_77, alias_77);  alias_77 = None
+	        sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None
+	        mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None
+	        mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None
+	        mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715)
+	        pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None
+	        mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None
+	        mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_128, [64, 3072]);  add_128 = None
+	        permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None
+	        mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None
+	        permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0])
+	        mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None
+	        permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None
+	        sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None
+	        view_264: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_90, [3072]);  sum_90 = None
+	        permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None
+	        view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_61, [1, 64, 768]);  mm_61 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_78: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54);  add_35 = getitem_54 = None
+	        mul_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_78, rsqrt_9);  sub_78 = None
+	        mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None
+	        mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768)
+	        sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)
+	        mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_275);  mul_276 = None
+	        sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None
+	        mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_275, sum_92);  sum_92 = None
+	        sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None
+	        sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None
+	        div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None
+	        mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None
+	        mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_275);  mul_275 = None
+	        sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None
+	        sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_129, [64, 768])
+	        permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None
+	        mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None
+	        permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0])
+	        mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None
+	        permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None
+	        sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None
+	        view_267: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_95, [768]);  sum_95 = None
+	        permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None
+	        view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_63, [1, 64, 768]);  mm_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]);  view_268 = None
+	        permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_78: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_17);  alias_17 = None
+	        alias_79: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_78);  alias_78 = None
+	        _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, alias_79, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = alias_79 = getitem_50 = getitem_51 = getitem_52 = None
+	        getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0]
+	        getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1]
+	        getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None
+	        view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_254, [1, 64, 768]);  permute_254 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None
+	        view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_255, [1, 64, 768]);  permute_255 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None
+	        view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_256, [1, 64, 768]);  permute_256 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None
+	        view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_7, [64, 2304]);  cat_7 = None
+	        permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None
+	        mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None
+	        permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0])
+	        mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None
+	        permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None
+	        sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None
+	        view_274: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_96, [2304]);  sum_96 = None
+	        permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None
+	        view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_65, [1, 64, 768]);  mm_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45);  add_32 = getitem_45 = None
+	        mul_282: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_81, rsqrt_8);  sub_81 = None
+	        mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None
+	        mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768)
+	        sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)
+	        mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_282);  mul_283 = None
+	        sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None
+	        mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_282, sum_98);  sum_98 = None
+	        sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None
+	        sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None
+	        div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None
+	        mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None
+	        mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_282);  mul_282 = None
+	        sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None
+	        sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_130, [64, 768])
+	        permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None
+	        mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None
+	        permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0])
+	        mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None
+	        permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None
+	        sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None
+	        view_277: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_101, [768]);  sum_101 = None
+	        permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None
+	        view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_67, [1, 64, 3072]);  mm_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None
+	        mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_80: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_15);  alias_15 = None
+	        alias_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_80);  alias_80 = None
+	        mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_81, alias_81);  alias_81 = None
+	        sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None
+	        mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None
+	        mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None
+	        mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715)
+	        pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None
+	        mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None
+	        mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_132, [64, 3072]);  add_132 = None
+	        permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None
+	        mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None
+	        permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0])
+	        mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None
+	        permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None
+	        sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None
+	        view_280: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_102, [3072]);  sum_102 = None
+	        permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None
+	        view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_69, [1, 64, 768]);  mm_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43);  add_27 = getitem_43 = None
+	        mul_298: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_85, rsqrt_7);  sub_85 = None
+	        mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None
+	        mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768)
+	        sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)
+	        mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_298);  mul_299 = None
+	        sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None
+	        mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_298, sum_104);  sum_104 = None
+	        sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None
+	        sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None
+	        div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None
+	        mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None
+	        mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_298);  mul_298 = None
+	        sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None
+	        sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_133, [64, 768])
+	        permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None
+	        mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None
+	        permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0])
+	        mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None
+	        permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None
+	        sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None
+	        view_283: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_107, [768]);  sum_107 = None
+	        permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None
+	        view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_71, [1, 64, 768]);  mm_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]);  view_284 = None
+	        permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_13);  alias_13 = None
+	        alias_83: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_82);  alias_82 = None
+	        _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, alias_83, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = alias_83 = getitem_39 = getitem_40 = getitem_41 = None
+	        getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0]
+	        getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1]
+	        getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None
+	        view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_274, [1, 64, 768]);  permute_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None
+	        view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_275, [1, 64, 768]);  permute_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None
+	        view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_276, [1, 64, 768]);  permute_276 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None
+	        view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_8, [64, 2304]);  cat_8 = None
+	        permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None
+	        mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None
+	        permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0])
+	        mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None
+	        permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None
+	        sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None
+	        view_290: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_108, [2304]);  sum_108 = None
+	        permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None
+	        view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_73, [1, 64, 768]);  mm_73 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34);  add_24 = getitem_34 = None
+	        mul_305: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_88, rsqrt_6);  sub_88 = None
+	        mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None
+	        mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768)
+	        sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)
+	        mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_305);  mul_306 = None
+	        sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None
+	        mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_305, sum_110);  sum_110 = None
+	        sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None
+	        sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None
+	        div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None
+	        mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None
+	        mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_305);  mul_305 = None
+	        sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None
+	        sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_134, [64, 768])
+	        permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None
+	        mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None
+	        permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0])
+	        mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None
+	        permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None
+	        sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None
+	        view_293: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_113, [768]);  sum_113 = None
+	        permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None
+	        view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_75, [1, 64, 3072]);  mm_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None
+	        mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_11);  alias_11 = None
+	        alias_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_84);  alias_84 = None
+	        mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_85, alias_85);  alias_85 = None
+	        sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None
+	        mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None
+	        mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None
+	        mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715)
+	        pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None
+	        mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None
+	        mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_136, [64, 3072]);  add_136 = None
+	        permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None
+	        mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None
+	        permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0])
+	        mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None
+	        permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None
+	        sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None
+	        view_296: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_114, [3072]);  sum_114 = None
+	        permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None
+	        view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_77, [1, 64, 768]);  mm_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_92: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32);  add_19 = getitem_32 = None
+	        mul_321: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_92, rsqrt_5);  sub_92 = None
+	        mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None
+	        mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768)
+	        sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)
+	        mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_321);  mul_322 = None
+	        sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None
+	        mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_321, sum_116);  sum_116 = None
+	        sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None
+	        sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None
+	        div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None
+	        mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None
+	        mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_321);  mul_321 = None
+	        sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None
+	        sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_137, [64, 768])
+	        permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None
+	        mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None
+	        permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0])
+	        mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None
+	        permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None
+	        sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None
+	        view_299: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_119, [768]);  sum_119 = None
+	        permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None
+	        view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_79, [1, 64, 768]);  mm_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]);  view_300 = None
+	        permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_86: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_9);  alias_9 = None
+	        alias_87: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_86);  alias_86 = None
+	        _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, alias_87, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = alias_87 = getitem_28 = getitem_29 = getitem_30 = None
+	        getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0]
+	        getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1]
+	        getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None
+	        view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_294, [1, 64, 768]);  permute_294 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None
+	        view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_295, [1, 64, 768]);  permute_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None
+	        view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_296, [1, 64, 768]);  permute_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None
+	        view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_9, [64, 2304]);  cat_9 = None
+	        permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None
+	        mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None
+	        permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0])
+	        mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None
+	        permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None
+	        sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None
+	        view_306: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_120, [2304]);  sum_120 = None
+	        permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None
+	        view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_81, [1, 64, 768]);  mm_81 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23);  add_16 = getitem_23 = None
+	        mul_328: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_95, rsqrt_4);  sub_95 = None
+	        mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None
+	        mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768)
+	        sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)
+	        mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_328);  mul_329 = None
+	        sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None
+	        mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_328, sum_122);  sum_122 = None
+	        sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None
+	        sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None
+	        div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None
+	        mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None
+	        mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_328);  mul_328 = None
+	        sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None
+	        sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_138, [64, 768])
+	        permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None
+	        mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None
+	        permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0])
+	        mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None
+	        permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None
+	        sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None
+	        view_309: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_125, [768]);  sum_125 = None
+	        permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None
+	        view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_83, [1, 64, 3072]);  mm_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None
+	        mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_88: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_7);  alias_7 = None
+	        alias_89: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_88);  alias_88 = None
+	        mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_89, alias_89);  alias_89 = None
+	        sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None
+	        mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None
+	        mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None
+	        mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715)
+	        pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None
+	        mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None
+	        mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_140, [64, 3072]);  add_140 = None
+	        permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None
+	        mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None
+	        permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0])
+	        mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None
+	        permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None
+	        sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None
+	        view_312: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_126, [3072]);  sum_126 = None
+	        permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None
+	        view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_85, [1, 64, 768]);  mm_85 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21);  add_11 = getitem_21 = None
+	        mul_344: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_99, rsqrt_3);  sub_99 = None
+	        mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None
+	        mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768)
+	        sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)
+	        mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_344);  mul_345 = None
+	        sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None
+	        mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_344, sum_128);  sum_128 = None
+	        sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None
+	        sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None
+	        div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None
+	        mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None
+	        mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_344);  mul_344 = None
+	        sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None
+	        sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_141, [64, 768])
+	        permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None
+	        mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None
+	        permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0])
+	        mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None
+	        permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None
+	        sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None
+	        view_315: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_131, [768]);  sum_131 = None
+	        permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None
+	        view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_87, [1, 64, 768]);  mm_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]);  view_316 = None
+	        permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_90: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_5);  alias_5 = None
+	        alias_91: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_90);  alias_90 = None
+	        _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, alias_91, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = alias_91 = getitem_17 = getitem_18 = getitem_19 = None
+	        getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0]
+	        getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1]
+	        getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None
+	        view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_314, [1, 64, 768]);  permute_314 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None
+	        view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_315, [1, 64, 768]);  permute_315 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None
+	        view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_316, [1, 64, 768]);  permute_316 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None
+	        view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_10, [64, 2304]);  cat_10 = None
+	        permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None
+	        mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None
+	        permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0])
+	        mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None
+	        permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None
+	        sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None
+	        view_322: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_132, [2304]);  sum_132 = None
+	        permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None
+	        view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_89, [1, 64, 768]);  mm_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12);  add_8 = getitem_12 = None
+	        mul_351: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_102, rsqrt_2);  sub_102 = None
+	        mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None
+	        mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768)
+	        sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)
+	        mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_351);  mul_352 = None
+	        sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None
+	        mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_351, sum_134);  sum_134 = None
+	        sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None
+	        sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None
+	        div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None
+	        mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None
+	        mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_351);  mul_351 = None
+	        sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None
+	        sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_142, [64, 768])
+	        permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None
+	        mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None
+	        permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0])
+	        mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None
+	        permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None
+	        sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None
+	        view_325: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_137, [768]);  sum_137 = None
+	        permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None
+	        view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_91, [1, 64, 3072]);  mm_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None
+	        mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        alias_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_3);  alias_3 = None
+	        alias_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.alias.default(alias_92);  alias_92 = None
+	        mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_93, alias_93);  alias_93 = None
+	        sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None
+	        mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None
+	        mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None
+	        mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715)
+	        pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None
+	        mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None
+	        mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_144, [64, 3072]);  add_144 = None
+	        permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None
+	        mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None
+	        permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0])
+	        mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None
+	        permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None
+	        sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None
+	        view_328: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_138, [3072]);  sum_138 = None
+	        permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None
+	        view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_93, [1, 64, 768]);  mm_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10);  add_3 = getitem_10 = None
+	        mul_367: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_106, rsqrt_1);  sub_106 = None
+	        mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None
+	        mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768)
+	        sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)
+	        mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_367);  mul_368 = None
+	        sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None
+	        mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_367, sum_140);  sum_140 = None
+	        sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None
+	        sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None
+	        div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None
+	        mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None
+	        mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_367);  mul_367 = None
+	        sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None
+	        sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_145, [64, 768])
+	        permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None
+	        mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None
+	        permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0])
+	        mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None
+	        permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None
+	        sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None
+	        view_331: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_143, [768]);  sum_143 = None
+	        permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None
+	        view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_95, [1, 64, 768]);  mm_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]);  view_332 = None
+	        permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        alias_94: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_1);  alias_1 = None
+	        alias_95: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.alias.default(alias_94);  alias_94 = None
+	        _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, alias_95, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = alias_95 = getitem_6 = getitem_7 = getitem_8 = None
+	        getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0]
+	        getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1]
+	        getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None
+	        view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_334, [1, 64, 768]);  permute_334 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None
+	        view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_335, [1, 64, 768]);  permute_335 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None
+	        view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_336, [1, 64, 768]);  permute_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None
+	        view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_11, [64, 2304]);  cat_11 = None
+	        permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None
+	        mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None
+	        permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0])
+	        mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None
+	        permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None
+	        sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None
+	        view_338: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_144, [2304]);  sum_144 = None
+	        permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None
+	        view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_97, [1, 64, 768]);  mm_97 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        sub_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(clone, getitem_1);  clone = getitem_1 = None
+	        mul_374: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_109, rsqrt);  sub_109 = None
+	        mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None
+	        mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768)
+	        sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)
+	        mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul_374);  mul_375 = None
+	        sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None
+	        mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_374, sum_146);  sum_146 = None
+	        sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None
+	        sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None
+	        div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None
+	        mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None
+	        mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul_374);  mul_374 = None
+	        sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None
+	        sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1)
+	        unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None
+	        scalar_tensor: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0))
+	        where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, scalar_tensor, add_146);  unsqueeze_1 = scalar_tensor = None
+	        full_3: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_3, [unsqueeze], where, True);  full_3 = unsqueeze = where = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1)
+	        unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None
+	        scalar_tensor_1: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0))
+	        where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, scalar_tensor_1, add_146);  unsqueeze_2 = scalar_tensor_1 = add_146 = None
+	        full_4: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_4, [primals_1], where_1, True);  full_4 = primals_1 = where_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None
+	        return pytree.tree_unflatten([view_145, None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4], self._out_spec)
+	        
+V0806 13:55:55.855000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:523] {"aot_forward_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "8c2323f7142c8f65a355535a234fd64e"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_2: "f32[50304, 768][768, 1]cuda:0", primals_3: "f32[1024, 768][768, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_5: "f32[768][1]cuda:0", primals_6: "f32[2304, 768][768, 1]cuda:0", primals_7: "f32[2304][1]cuda:0", primals_8: "f32[768, 768][768, 1]cuda:0", primals_9: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_11: "f32[768][1]cuda:0", primals_12: "f32[3072, 768][768, 1]cuda:0", primals_13: "f32[3072][1]cuda:0", primals_14: "f32[768, 3072][3072, 1]cuda:0", primals_15: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_17: "f32[768][1]cuda:0", primals_18: "f32[2304, 768][768, 1]cuda:0", primals_19: "f32[2304][1]cuda:0", primals_20: "f32[768, 768][768, 1]cuda:0", primals_21: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_23: "f32[768][1]cuda:0", primals_24: "f32[3072, 768][768, 1]cuda:0", primals_25: "f32[3072][1]cuda:0", primals_26: "f32[768, 3072][3072, 1]cuda:0", primals_27: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_29: "f32[768][1]cuda:0", primals_30: "f32[2304, 768][768, 1]cuda:0", primals_31: "f32[2304][1]cuda:0", primals_32: "f32[768, 768][768, 1]cuda:0", primals_33: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_35: "f32[768][1]cuda:0", primals_36: "f32[3072, 768][768, 1]cuda:0", primals_37: "f32[3072][1]cuda:0", primals_38: "f32[768, 3072][3072, 1]cuda:0", primals_39: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_41: "f32[768][1]cuda:0", primals_42: "f32[2304, 768][768, 1]cuda:0", primals_43: "f32[2304][1]cuda:0", primals_44: "f32[768, 768][768, 1]cuda:0", primals_45: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_47: "f32[768][1]cuda:0", primals_48: "f32[3072, 768][768, 1]cuda:0", primals_49: "f32[3072][1]cuda:0", primals_50: "f32[768, 3072][3072, 1]cuda:0", primals_51: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_53: "f32[768][1]cuda:0", primals_54: "f32[2304, 768][768, 1]cuda:0", primals_55: "f32[2304][1]cuda:0", primals_56: "f32[768, 768][768, 1]cuda:0", primals_57: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_59: "f32[768][1]cuda:0", primals_60: "f32[3072, 768][768, 1]cuda:0", primals_61: "f32[3072][1]cuda:0", primals_62: "f32[768, 3072][3072, 1]cuda:0", primals_63: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_65: "f32[768][1]cuda:0", primals_66: "f32[2304, 768][768, 1]cuda:0", primals_67: "f32[2304][1]cuda:0", primals_68: "f32[768, 768][768, 1]cuda:0", primals_69: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_71: "f32[768][1]cuda:0", primals_72: "f32[3072, 768][768, 1]cuda:0", primals_73: "f32[3072][1]cuda:0", primals_74: "f32[768, 3072][3072, 1]cuda:0", primals_75: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_77: "f32[768][1]cuda:0", primals_78: "f32[2304, 768][768, 1]cuda:0", primals_79: "f32[2304][1]cuda:0", primals_80: "f32[768, 768][768, 1]cuda:0", primals_81: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_83: "f32[768][1]cuda:0", primals_84: "f32[3072, 768][768, 1]cuda:0", primals_85: "f32[3072][1]cuda:0", primals_86: "f32[768, 3072][3072, 1]cuda:0", primals_87: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_89: "f32[768][1]cuda:0", primals_90: "f32[2304, 768][768, 1]cuda:0", primals_91: "f32[2304][1]cuda:0", primals_92: "f32[768, 768][768, 1]cuda:0", primals_93: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_95: "f32[768][1]cuda:0", primals_96: "f32[3072, 768][768, 1]cuda:0", primals_97: "f32[3072][1]cuda:0", primals_98: "f32[768, 3072][3072, 1]cuda:0", primals_99: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_101: "f32[768][1]cuda:0", primals_102: "f32[2304, 768][768, 1]cuda:0", primals_103: "f32[2304][1]cuda:0", primals_104: "f32[768, 768][768, 1]cuda:0", primals_105: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_107: "f32[768][1]cuda:0", primals_108: "f32[3072, 768][768, 1]cuda:0", primals_109: "f32[3072][1]cuda:0", primals_110: "f32[768, 3072][3072, 1]cuda:0", primals_111: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_113: "f32[768][1]cuda:0", primals_114: "f32[2304, 768][768, 1]cuda:0", primals_115: "f32[2304][1]cuda:0", primals_116: "f32[768, 768][768, 1]cuda:0", primals_117: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_119: "f32[768][1]cuda:0", primals_120: "f32[3072, 768][768, 1]cuda:0", primals_121: "f32[3072][1]cuda:0", primals_122: "f32[768, 3072][3072, 1]cuda:0", primals_123: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_125: "f32[768][1]cuda:0", primals_126: "f32[2304, 768][768, 1]cuda:0", primals_127: "f32[2304][1]cuda:0", primals_128: "f32[768, 768][768, 1]cuda:0", primals_129: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_131: "f32[768][1]cuda:0", primals_132: "f32[3072, 768][768, 1]cuda:0", primals_133: "f32[3072][1]cuda:0", primals_134: "f32[768, 3072][3072, 1]cuda:0", primals_135: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_137: "f32[768][1]cuda:0", primals_138: "f32[2304, 768][768, 1]cuda:0", primals_139: "f32[2304][1]cuda:0", primals_140: "f32[768, 768][768, 1]cuda:0", primals_141: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_143: "f32[768][1]cuda:0", primals_144: "f32[3072, 768][768, 1]cuda:0", primals_145: "f32[3072][1]cuda:0", primals_146: "f32[768, 3072][3072, 1]cuda:0", primals_147: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", primals_149: "f32[768][1]cuda:0"):
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(
+	        iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)
+	        unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb)
+	        add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)
+	        getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0]
+	        getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1];  var_mean = None
+	        add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None
+	        rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1);  add_1 = None
+	        sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add, getitem_1);  getitem_1 = None
+	        mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None
+	        mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4)
+	        add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_2, [64, 768]);  add_2 = None
+	        permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None
+	        addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None
+	        view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm, [1, 64, 2304]);  addmm = None
+	        split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None
+	        getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0]
+	        getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1]
+	        getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2];  split = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None
+	        permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None
+	        permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None
+	        permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)
+	        getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0]
+	        getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1]
+	        getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2]
+	        getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None
+	        permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None
+	        addmm_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_9, view_6, permute_5);  primals_9 = view_6 = None
+	        view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_1, [1, 64, 768]);  addmm_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add, view_7);  add = view_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)
+	        getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0]
+	        getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1];  var_mean_1 = None
+	        add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None
+	        rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4);  add_4 = None
+	        sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10);  getitem_10 = None
+	        mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None
+	        mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10)
+	        add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_5, [64, 768]);  add_5 = None
+	        permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None
+	        addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None
+	        view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5);  view_9 = mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7);  mul_4 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_7, [64, 3072]);  mul_7 = None
+	        permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None
+	        addmm_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_15, view_10, permute_7);  primals_15 = None
+	        view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_3, [1, 64, 768]);  addmm_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, view_11);  add_3 = view_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)
+	        getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0]
+	        getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1];  var_mean_2 = None
+	        add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None
+	        rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9);  add_9 = None
+	        sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12);  getitem_12 = None
+	        mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None
+	        mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16)
+	        add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_10, [64, 768]);  add_10 = None
+	        permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None
+	        addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None
+	        view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]);  addmm_4 = None
+	        split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None
+	        getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0]
+	        getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1]
+	        getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2];  split_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None
+	        permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None
+	        permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None
+	        permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)
+	        getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0]
+	        getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1]
+	        getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2]
+	        getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None
+	        permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None
+	        addmm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_21, view_18, permute_13);  primals_21 = view_18 = None
+	        view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_5, [1, 64, 768]);  addmm_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, view_19);  add_8 = view_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)
+	        getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0]
+	        getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1];  var_mean_3 = None
+	        add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None
+	        rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12);  add_12 = None
+	        sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21);  getitem_21 = None
+	        mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None
+	        mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22)
+	        add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_13, [64, 768]);  add_13 = None
+	        permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None
+	        addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None
+	        view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13);  view_21 = mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15);  mul_12 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_15, [64, 3072]);  mul_15 = None
+	        permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None
+	        addmm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_27, view_22, permute_15);  primals_27 = None
+	        view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_7, [1, 64, 768]);  addmm_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, view_23);  add_11 = view_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)
+	        getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0]
+	        getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1];  var_mean_4 = None
+	        add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None
+	        rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17);  add_17 = None
+	        sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23);  getitem_23 = None
+	        mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None
+	        mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28)
+	        add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_18, [64, 768]);  add_18 = None
+	        permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None
+	        addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None
+	        view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]);  addmm_8 = None
+	        split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None
+	        getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0]
+	        getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1]
+	        getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2];  split_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None
+	        permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None
+	        permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None
+	        permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)
+	        getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0]
+	        getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1]
+	        getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2]
+	        getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None
+	        permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None
+	        addmm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_33, view_30, permute_21);  primals_33 = view_30 = None
+	        view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_9, [1, 64, 768]);  addmm_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, view_31);  add_16 = view_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)
+	        getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0]
+	        getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1];  var_mean_5 = None
+	        add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None
+	        rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20);  add_20 = None
+	        sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32);  getitem_32 = None
+	        mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None
+	        mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34)
+	        add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_21, [64, 768]);  add_21 = None
+	        permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None
+	        addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None
+	        view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21);  view_33 = mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23);  mul_20 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_23, [64, 3072]);  mul_23 = None
+	        permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None
+	        addmm_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_39, view_34, permute_23);  primals_39 = None
+	        view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_11, [1, 64, 768]);  addmm_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, view_35);  add_19 = view_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)
+	        getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0]
+	        getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1];  var_mean_6 = None
+	        add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None
+	        rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25);  add_25 = None
+	        sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34);  getitem_34 = None
+	        mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None
+	        mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40)
+	        add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_26, [64, 768]);  add_26 = None
+	        permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None
+	        addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None
+	        view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]);  addmm_12 = None
+	        split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None
+	        getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0]
+	        getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1]
+	        getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2];  split_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None
+	        permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None
+	        permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None
+	        permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)
+	        getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0]
+	        getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1]
+	        getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2]
+	        getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None
+	        permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None
+	        addmm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_45, view_42, permute_29);  primals_45 = view_42 = None
+	        view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_13, [1, 64, 768]);  addmm_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, view_43);  add_24 = view_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)
+	        getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0]
+	        getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1];  var_mean_7 = None
+	        add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None
+	        rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28);  add_28 = None
+	        sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43);  getitem_43 = None
+	        mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None
+	        mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46)
+	        add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_29, [64, 768]);  add_29 = None
+	        permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None
+	        addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None
+	        view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29);  view_45 = mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31);  mul_28 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_31, [64, 3072]);  mul_31 = None
+	        permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None
+	        addmm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_51, view_46, permute_31);  primals_51 = None
+	        view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_15, [1, 64, 768]);  addmm_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, view_47);  add_27 = view_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)
+	        getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0]
+	        getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1];  var_mean_8 = None
+	        add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None
+	        rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33);  add_33 = None
+	        sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45);  getitem_45 = None
+	        mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None
+	        mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52)
+	        add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_34, [64, 768]);  add_34 = None
+	        permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None
+	        addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None
+	        view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]);  addmm_16 = None
+	        split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None
+	        getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0]
+	        getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1]
+	        getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2];  split_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None
+	        permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None
+	        permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None
+	        permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)
+	        getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0]
+	        getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1]
+	        getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2]
+	        getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None
+	        permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None
+	        addmm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_57, view_54, permute_37);  primals_57 = view_54 = None
+	        view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_17, [1, 64, 768]);  addmm_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, view_55);  add_32 = view_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)
+	        getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0]
+	        getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1];  var_mean_9 = None
+	        add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None
+	        rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36);  add_36 = None
+	        sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54);  getitem_54 = None
+	        mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None
+	        mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58)
+	        add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_37, [64, 768]);  add_37 = None
+	        permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None
+	        addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None
+	        view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37);  view_57 = mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39);  mul_36 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_39, [64, 3072]);  mul_39 = None
+	        permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None
+	        addmm_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_63, view_58, permute_39);  primals_63 = None
+	        view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_19, [1, 64, 768]);  addmm_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, view_59);  add_35 = view_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)
+	        getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0]
+	        getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1];  var_mean_10 = None
+	        add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None
+	        rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41);  add_41 = None
+	        sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56);  getitem_56 = None
+	        mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None
+	        mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64)
+	        add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_42, [64, 768]);  add_42 = None
+	        permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None
+	        addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None
+	        view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]);  addmm_20 = None
+	        split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None
+	        getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0]
+	        getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1]
+	        getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2];  split_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None
+	        permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None
+	        permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None
+	        permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)
+	        getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0]
+	        getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1]
+	        getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2]
+	        getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None
+	        permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None
+	        addmm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_69, view_66, permute_45);  primals_69 = view_66 = None
+	        view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_21, [1, 64, 768]);  addmm_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, view_67);  add_40 = view_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)
+	        getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0]
+	        getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1];  var_mean_11 = None
+	        add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None
+	        rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44);  add_44 = None
+	        sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65);  getitem_65 = None
+	        mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None
+	        mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70)
+	        add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_45, [64, 768]);  add_45 = None
+	        permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None
+	        addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None
+	        view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45);  view_69 = mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47);  mul_44 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_47, [64, 3072]);  mul_47 = None
+	        permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None
+	        addmm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_75, view_70, permute_47);  primals_75 = None
+	        view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_23, [1, 64, 768]);  addmm_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, view_71);  add_43 = view_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)
+	        getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0]
+	        getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1];  var_mean_12 = None
+	        add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None
+	        rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49);  add_49 = None
+	        sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67);  getitem_67 = None
+	        mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None
+	        mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76)
+	        add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_50, [64, 768]);  add_50 = None
+	        permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None
+	        addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None
+	        view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]);  addmm_24 = None
+	        split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None
+	        getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0]
+	        getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1]
+	        getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2];  split_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None
+	        permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None
+	        permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None
+	        permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)
+	        getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0]
+	        getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1]
+	        getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2]
+	        getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None
+	        permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None
+	        addmm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_81, view_78, permute_53);  primals_81 = view_78 = None
+	        view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_25, [1, 64, 768]);  addmm_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, view_79);  add_48 = view_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)
+	        getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0]
+	        getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1];  var_mean_13 = None
+	        add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None
+	        rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52);  add_52 = None
+	        sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76);  getitem_76 = None
+	        mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None
+	        mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82)
+	        add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_53, [64, 768]);  add_53 = None
+	        permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None
+	        addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None
+	        view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53);  view_81 = mul_53 = None
+	        mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55);  mul_52 = add_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_55, [64, 3072]);  mul_55 = None
+	        permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None
+	        addmm_27: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_87, view_82, permute_55);  primals_87 = None
+	        view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_27, [1, 64, 768]);  addmm_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, view_83);  add_51 = view_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)
+	        getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0]
+	        getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1];  var_mean_14 = None
+	        add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None
+	        rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57);  add_57 = None
+	        sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78);  getitem_78 = None
+	        mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None
+	        mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88)
+	        add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_58, [64, 768]);  add_58 = None
+	        permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None
+	        addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None
+	        view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]);  addmm_28 = None
+	        split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None
+	        getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0]
+	        getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1]
+	        getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2];  split_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None
+	        permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None
+	        permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None
+	        permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)
+	        getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0]
+	        getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1]
+	        getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2]
+	        getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None
+	        permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None
+	        addmm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_93, view_90, permute_61);  primals_93 = view_90 = None
+	        view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_29, [1, 64, 768]);  addmm_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, view_91);  add_56 = view_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)
+	        getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0]
+	        getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1];  var_mean_15 = None
+	        add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None
+	        rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60);  add_60 = None
+	        sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87);  getitem_87 = None
+	        mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None
+	        mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94)
+	        add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_61, [64, 768]);  add_61 = None
+	        permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None
+	        addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None
+	        view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61);  view_93 = mul_61 = None
+	        mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63);  mul_60 = add_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_63, [64, 3072]);  mul_63 = None
+	        permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None
+	        addmm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_99, view_94, permute_63);  primals_99 = None
+	        view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_31, [1, 64, 768]);  addmm_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, view_95);  add_59 = view_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)
+	        getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0]
+	        getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1];  var_mean_16 = None
+	        add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None
+	        rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65);  add_65 = None
+	        sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89);  getitem_89 = None
+	        mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None
+	        mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100)
+	        add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_66, [64, 768]);  add_66 = None
+	        permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None
+	        addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None
+	        view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]);  addmm_32 = None
+	        split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None
+	        getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0]
+	        getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1]
+	        getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2];  split_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None
+	        permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None
+	        permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None
+	        permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)
+	        getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0]
+	        getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1]
+	        getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2]
+	        getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None
+	        permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None
+	        addmm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_105, view_102, permute_69);  primals_105 = view_102 = None
+	        view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_33, [1, 64, 768]);  addmm_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, view_103);  add_64 = view_103 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)
+	        getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0]
+	        getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1];  var_mean_17 = None
+	        add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None
+	        rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68);  add_68 = None
+	        sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98);  getitem_98 = None
+	        mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None
+	        mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106)
+	        add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_69, [64, 768]);  add_69 = None
+	        permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None
+	        addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None
+	        view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69);  view_105 = mul_69 = None
+	        mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71);  mul_68 = add_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_71, [64, 3072]);  mul_71 = None
+	        permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None
+	        addmm_35: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_111, view_106, permute_71);  primals_111 = None
+	        view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_35, [1, 64, 768]);  addmm_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, view_107);  add_67 = view_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)
+	        getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0]
+	        getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1];  var_mean_18 = None
+	        add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None
+	        rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73);  add_73 = None
+	        sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100);  getitem_100 = None
+	        mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None
+	        mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112)
+	        add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_74, [64, 768]);  add_74 = None
+	        permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None
+	        addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None
+	        view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]);  addmm_36 = None
+	        split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None
+	        getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0]
+	        getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1]
+	        getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2];  split_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None
+	        permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None
+	        permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None
+	        permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)
+	        getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0]
+	        getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1]
+	        getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2]
+	        getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None
+	        permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None
+	        addmm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_117, view_114, permute_77);  primals_117 = view_114 = None
+	        view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_37, [1, 64, 768]);  addmm_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, view_115);  add_72 = view_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)
+	        getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0]
+	        getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1];  var_mean_19 = None
+	        add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None
+	        rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76);  add_76 = None
+	        sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109);  getitem_109 = None
+	        mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None
+	        mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118)
+	        add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_77, [64, 768]);  add_77 = None
+	        permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None
+	        addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None
+	        view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77);  view_117 = mul_77 = None
+	        mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79);  mul_76 = add_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_79, [64, 3072]);  mul_79 = None
+	        permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None
+	        addmm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_123, view_118, permute_79);  primals_123 = None
+	        view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_39, [1, 64, 768]);  addmm_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, view_119);  add_75 = view_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)
+	        getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0]
+	        getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1];  var_mean_20 = None
+	        add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None
+	        rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81);  add_81 = None
+	        sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111);  getitem_111 = None
+	        mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None
+	        mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124)
+	        add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_82, [64, 768]);  add_82 = None
+	        permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None
+	        addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None
+	        view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]);  addmm_40 = None
+	        split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None
+	        getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0]
+	        getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1]
+	        getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2];  split_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None
+	        permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None
+	        permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None
+	        permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)
+	        getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0]
+	        getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1]
+	        getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2]
+	        getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None
+	        permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None
+	        addmm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_129, view_126, permute_85);  primals_129 = view_126 = None
+	        view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_41, [1, 64, 768]);  addmm_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, view_127);  add_80 = view_127 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)
+	        getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0]
+	        getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1];  var_mean_21 = None
+	        add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None
+	        rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84);  add_84 = None
+	        sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120);  getitem_120 = None
+	        mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None
+	        mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130)
+	        add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_85, [64, 768]);  add_85 = None
+	        permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None
+	        addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None
+	        view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85);  view_129 = mul_85 = None
+	        mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87);  mul_84 = add_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_87, [64, 3072]);  mul_87 = None
+	        permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None
+	        addmm_43: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_135, view_130, permute_87);  primals_135 = None
+	        view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_43, [1, 64, 768]);  addmm_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, view_131);  add_83 = view_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)
+	        getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0]
+	        getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1];  var_mean_22 = None
+	        add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None
+	        rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89);  add_89 = None
+	        sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122);  getitem_122 = None
+	        mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None
+	        mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136)
+	        add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_90, [64, 768]);  add_90 = None
+	        permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None
+	        addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None
+	        view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]);  addmm_44 = None
+	        split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None
+	        getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0]
+	        getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1]
+	        getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2];  split_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None
+	        permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None
+	        permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None
+	        permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)
+	        getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0]
+	        getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1]
+	        getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2]
+	        getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None
+	        permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None
+	        addmm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_141, view_138, permute_93);  primals_141 = view_138 = None
+	        view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_45, [1, 64, 768]);  addmm_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, view_139);  add_88 = view_139 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)
+	        getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0]
+	        getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1];  var_mean_23 = None
+	        add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None
+	        rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92);  add_92 = None
+	        sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131);  getitem_131 = None
+	        mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None
+	        mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142)
+	        add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_93, [64, 768]);  add_93 = None
+	        permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None
+	        addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None
+	        view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93);  view_141 = mul_93 = None
+	        mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95);  mul_92 = add_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(mul_95, [64, 3072]);  mul_95 = None
+	        permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None
+	        addmm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.addmm.default(primals_147, view_142, permute_95);  primals_147 = None
+	        view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(addmm_47, [1, 64, 768]);  addmm_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, view_143);  add_91 = view_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)
+	        getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0]
+	        getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1];  var_mean_24 = None
+	        add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None
+	        rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97);  add_97 = None
+	        sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None
+	        mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None
+	        mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148)
+	        add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        full_default: "i64[1][1]cuda:0" = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(add_98, [None, full_default]);  add_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None
+	        view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.view.default(index, [1, 768]);  index = None
+	        mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96)
+	        view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.view.default(mm, [1, 1, 50304]);  mm = None
+	        permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None
+	        return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)
+	        
+V0806 13:55:55.886000 4107173 torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py:529] {"aot_backward_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "06b75a7f8452a4f7c7a1286ae5e71622"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", unsqueeze: "i64[1, 64][64, 1]cuda:0", mul: "f32[1, 64, 768][49152, 768, 1]cuda:0", view: "f32[64, 768][768, 1]cuda:0", permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_7: "i64[][]cuda:0", getitem_8: "i64[][]cuda:0", mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_8: "f32[64, 768][768, 1]cuda:0", addmm_2: "f32[64, 3072][3072, 1]cuda:0", view_10: "f32[64, 3072][3072, 1]cuda:0", mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_12: "f32[64, 768][768, 1]cuda:0", permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_18: "i64[][]cuda:0", getitem_19: "i64[][]cuda:0", mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_20: "f32[64, 768][768, 1]cuda:0", addmm_6: "f32[64, 3072][3072, 1]cuda:0", view_22: "f32[64, 3072][3072, 1]cuda:0", mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_24: "f32[64, 768][768, 1]cuda:0", permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_29: "i64[][]cuda:0", getitem_30: "i64[][]cuda:0", mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_32: "f32[64, 768][768, 1]cuda:0", addmm_10: "f32[64, 3072][3072, 1]cuda:0", view_34: "f32[64, 3072][3072, 1]cuda:0", mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_36: "f32[64, 768][768, 1]cuda:0", permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_40: "i64[][]cuda:0", getitem_41: "i64[][]cuda:0", mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_44: "f32[64, 768][768, 1]cuda:0", addmm_14: "f32[64, 3072][3072, 1]cuda:0", view_46: "f32[64, 3072][3072, 1]cuda:0", mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_48: "f32[64, 768][768, 1]cuda:0", permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_51: "i64[][]cuda:0", getitem_52: "i64[][]cuda:0", mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_56: "f32[64, 768][768, 1]cuda:0", addmm_18: "f32[64, 3072][3072, 1]cuda:0", view_58: "f32[64, 3072][3072, 1]cuda:0", mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_60: "f32[64, 768][768, 1]cuda:0", permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_62: "i64[][]cuda:0", getitem_63: "i64[][]cuda:0", mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_68: "f32[64, 768][768, 1]cuda:0", addmm_22: "f32[64, 3072][3072, 1]cuda:0", view_70: "f32[64, 3072][3072, 1]cuda:0", mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_72: "f32[64, 768][768, 1]cuda:0", permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_73: "i64[][]cuda:0", getitem_74: "i64[][]cuda:0", mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_80: "f32[64, 768][768, 1]cuda:0", addmm_26: "f32[64, 3072][3072, 1]cuda:0", view_82: "f32[64, 3072][3072, 1]cuda:0", mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_84: "f32[64, 768][768, 1]cuda:0", permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_84: "i64[][]cuda:0", getitem_85: "i64[][]cuda:0", mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_92: "f32[64, 768][768, 1]cuda:0", addmm_30: "f32[64, 3072][3072, 1]cuda:0", view_94: "f32[64, 3072][3072, 1]cuda:0", mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_96: "f32[64, 768][768, 1]cuda:0", permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_95: "i64[][]cuda:0", getitem_96: "i64[][]cuda:0", mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_104: "f32[64, 768][768, 1]cuda:0", addmm_34: "f32[64, 3072][3072, 1]cuda:0", view_106: "f32[64, 3072][3072, 1]cuda:0", mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_108: "f32[64, 768][768, 1]cuda:0", permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_106: "i64[][]cuda:0", getitem_107: "i64[][]cuda:0", mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_116: "f32[64, 768][768, 1]cuda:0", addmm_38: "f32[64, 3072][3072, 1]cuda:0", view_118: "f32[64, 3072][3072, 1]cuda:0", mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_120: "f32[64, 768][768, 1]cuda:0", permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_117: "i64[][]cuda:0", getitem_118: "i64[][]cuda:0", mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_128: "f32[64, 768][768, 1]cuda:0", addmm_42: "f32[64, 3072][3072, 1]cuda:0", view_130: "f32[64, 3072][3072, 1]cuda:0", mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_132: "f32[64, 768][768, 1]cuda:0", permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_128: "i64[][]cuda:0", getitem_129: "i64[][]cuda:0", mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_140: "f32[64, 768][768, 1]cuda:0", addmm_46: "f32[64, 3072][3072, 1]cuda:0", view_142: "f32[64, 3072][3072, 1]cuda:0", mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0", full_default: "i64[1][1]cuda:0", view_144: "f32[1, 768][768, 1]cuda:0", permute_99: "f32[50304, 768][768, 1]cuda:0", div: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_101: "f32[768, 3072][3072, 1]cuda:0", permute_105: "f32[3072, 768][768, 1]cuda:0", div_1: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_109: "f32[768, 768][768, 1]cuda:0", permute_117: "f32[2304, 768][768, 1]cuda:0", div_2: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_121: "f32[768, 3072][3072, 1]cuda:0", permute_125: "f32[3072, 768][768, 1]cuda:0", div_3: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_129: "f32[768, 768][768, 1]cuda:0", permute_137: "f32[2304, 768][768, 1]cuda:0", div_4: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_141: "f32[768, 3072][3072, 1]cuda:0", permute_145: "f32[3072, 768][768, 1]cuda:0", div_5: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_149: "f32[768, 768][768, 1]cuda:0", permute_157: "f32[2304, 768][768, 1]cuda:0", div_6: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_161: "f32[768, 3072][3072, 1]cuda:0", permute_165: "f32[3072, 768][768, 1]cuda:0", div_7: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_169: "f32[768, 768][768, 1]cuda:0", permute_177: "f32[2304, 768][768, 1]cuda:0", div_8: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_181: "f32[768, 3072][3072, 1]cuda:0", permute_185: "f32[3072, 768][768, 1]cuda:0", div_9: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_189: "f32[768, 768][768, 1]cuda:0", permute_197: "f32[2304, 768][768, 1]cuda:0", div_10: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_201: "f32[768, 3072][3072, 1]cuda:0", permute_205: "f32[3072, 768][768, 1]cuda:0", div_11: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_209: "f32[768, 768][768, 1]cuda:0", permute_217: "f32[2304, 768][768, 1]cuda:0", div_12: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_221: "f32[768, 3072][3072, 1]cuda:0", permute_225: "f32[3072, 768][768, 1]cuda:0", div_13: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_229: "f32[768, 768][768, 1]cuda:0", permute_237: "f32[2304, 768][768, 1]cuda:0", div_14: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_241: "f32[768, 3072][3072, 1]cuda:0", permute_245: "f32[3072, 768][768, 1]cuda:0", div_15: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_249: "f32[768, 768][768, 1]cuda:0", permute_257: "f32[2304, 768][768, 1]cuda:0", div_16: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_261: "f32[768, 3072][3072, 1]cuda:0", permute_265: "f32[3072, 768][768, 1]cuda:0", div_17: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_269: "f32[768, 768][768, 1]cuda:0", permute_277: "f32[2304, 768][768, 1]cuda:0", div_18: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_281: "f32[768, 3072][3072, 1]cuda:0", permute_285: "f32[3072, 768][768, 1]cuda:0", div_19: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_289: "f32[768, 768][768, 1]cuda:0", permute_297: "f32[2304, 768][768, 1]cuda:0", div_20: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_301: "f32[768, 3072][3072, 1]cuda:0", permute_305: "f32[3072, 768][768, 1]cuda:0", div_21: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_309: "f32[768, 768][768, 1]cuda:0", permute_317: "f32[2304, 768][768, 1]cuda:0", div_22: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_321: "f32[768, 3072][3072, 1]cuda:0", permute_325: "f32[3072, 768][768, 1]cuda:0", div_23: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_329: "f32[768, 768][768, 1]cuda:0", permute_337: "f32[2304, 768][768, 1]cuda:0", div_24: "f32[1, 64, 1][64, 1, 1]cuda:0", tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"):
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.view.default(tangents_1, [1, 50304]);  tangents_1 = None
+	        permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0])
+	        mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None
+	        permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None
+	        mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None
+	        view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_2, [1, 1, 768]);  mm_2 = None
+	        permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        full_default_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True);  full_default_1 = full_default = view_147 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, primals_148);  primals_148 = None
+	        mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768)
+	        sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)
+	        mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_96);  mul_99 = None
+	        sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None
+	        mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, sum_2);  sum_2 = None
+	        sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None
+	        sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None
+	        mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None
+	        mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, mul_96);  mul_96 = None
+	        sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None
+	        sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]);  index_put = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(mul_103, [64, 768])
+	        mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None
+	        permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0])
+	        mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None
+	        permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None
+	        sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None
+	        view_149: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_5, [768]);  sum_5 = None
+	        permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None
+	        view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_3, [1, 64, 3072]);  mm_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]);  addmm_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None
+	        mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_11, tanh_11);  tanh_11 = None
+	        sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None
+	        mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None
+	        mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None
+	        mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715)
+	        pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None
+	        mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None
+	        mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_100, [64, 3072]);  add_100 = None
+	        mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None
+	        permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0])
+	        mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None
+	        permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None
+	        sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None
+	        view_152: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_6, [3072]);  sum_6 = None
+	        permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None
+	        view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_5, [1, 64, 768]);  mm_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None
+	        mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768)
+	        sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)
+	        mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_90);  mul_115 = None
+	        sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None
+	        mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, sum_8);  sum_8 = None
+	        sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None
+	        sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None
+	        mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None
+	        mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_90);  mul_90 = None
+	        sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None
+	        sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_101, [64, 768])
+	        mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None
+	        permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None
+	        mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None
+	        permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None
+	        sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None
+	        view_155: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_11, [768]);  sum_11 = None
+	        permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None
+	        view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_7, [1, 64, 768]);  mm_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]);  view_156 = None
+	        permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None
+	        getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0]
+	        getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1]
+	        getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None
+	        view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_114, [1, 64, 768]);  permute_114 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None
+	        view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_115, [1, 64, 768]);  permute_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None
+	        view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_116, [1, 64, 768]);  permute_116 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None
+	        view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat, [64, 2304]);  cat = None
+	        mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None
+	        permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0])
+	        mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None
+	        permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None
+	        sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None
+	        view_162: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_12, [2304]);  sum_12 = None
+	        permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None
+	        view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_9, [1, 64, 768]);  mm_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None
+	        mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768)
+	        sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)
+	        mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_88);  mul_122 = None
+	        sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None
+	        mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, sum_14);  sum_14 = None
+	        sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None
+	        sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None
+	        mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None
+	        mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_88);  mul_88 = None
+	        sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None
+	        sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_102, [64, 768])
+	        mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None
+	        permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0])
+	        mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None
+	        permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None
+	        sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None
+	        view_165: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_17, [768]);  sum_17 = None
+	        permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None
+	        view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_11, [1, 64, 3072]);  mm_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]);  addmm_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None
+	        mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_10, tanh_10);  tanh_10 = None
+	        sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None
+	        mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None
+	        mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None
+	        mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715)
+	        pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None
+	        mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None
+	        mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_104, [64, 3072]);  add_104 = None
+	        mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None
+	        permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0])
+	        mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None
+	        permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None
+	        sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None
+	        view_168: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_18, [3072]);  sum_18 = None
+	        permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None
+	        view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_13, [1, 64, 768]);  mm_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None
+	        mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768)
+	        sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)
+	        mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_82);  mul_138 = None
+	        sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None
+	        mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, sum_20);  sum_20 = None
+	        sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None
+	        sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None
+	        mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None
+	        mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_82);  mul_82 = None
+	        sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None
+	        sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_105, [64, 768])
+	        mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None
+	        permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None
+	        mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None
+	        permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None
+	        sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None
+	        view_171: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_23, [768]);  sum_23 = None
+	        permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None
+	        view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_15, [1, 64, 768]);  mm_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]);  view_172 = None
+	        permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None
+	        getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0]
+	        getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1]
+	        getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None
+	        view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_134, [1, 64, 768]);  permute_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None
+	        view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_135, [1, 64, 768]);  permute_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None
+	        view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_136, [1, 64, 768]);  permute_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None
+	        view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_1, [64, 2304]);  cat_1 = None
+	        mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None
+	        permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0])
+	        mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None
+	        permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None
+	        sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None
+	        view_178: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_24, [2304]);  sum_24 = None
+	        permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None
+	        view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_17, [1, 64, 768]);  mm_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None
+	        mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768)
+	        sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)
+	        mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_80);  mul_145 = None
+	        sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None
+	        mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, sum_26);  sum_26 = None
+	        sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None
+	        sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None
+	        mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None
+	        mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_80);  mul_80 = None
+	        sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None
+	        sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_106, [64, 768])
+	        mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None
+	        permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0])
+	        mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None
+	        permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None
+	        sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None
+	        view_181: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_29, [768]);  sum_29 = None
+	        permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None
+	        view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_19, [1, 64, 3072]);  mm_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]);  addmm_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None
+	        mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_9, tanh_9);  tanh_9 = None
+	        sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None
+	        mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None
+	        mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None
+	        mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715)
+	        pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None
+	        mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None
+	        mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_108, [64, 3072]);  add_108 = None
+	        mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None
+	        permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0])
+	        mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None
+	        permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None
+	        sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None
+	        view_184: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_30, [3072]);  sum_30 = None
+	        permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None
+	        view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_21, [1, 64, 768]);  mm_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None
+	        mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768)
+	        sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)
+	        mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_74);  mul_161 = None
+	        sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None
+	        mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, sum_32);  sum_32 = None
+	        sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None
+	        sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None
+	        mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None
+	        mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_74);  mul_74 = None
+	        sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None
+	        sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_109, [64, 768])
+	        mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None
+	        permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None
+	        mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None
+	        permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None
+	        sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None
+	        view_187: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_35, [768]);  sum_35 = None
+	        permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None
+	        view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_23, [1, 64, 768]);  mm_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]);  view_188 = None
+	        permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None
+	        getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0]
+	        getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1]
+	        getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None
+	        view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_154, [1, 64, 768]);  permute_154 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None
+	        view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_155, [1, 64, 768]);  permute_155 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None
+	        view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_156, [1, 64, 768]);  permute_156 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None
+	        view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_2, [64, 2304]);  cat_2 = None
+	        mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None
+	        permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0])
+	        mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None
+	        permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None
+	        sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None
+	        view_194: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_36, [2304]);  sum_36 = None
+	        permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None
+	        view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_25, [1, 64, 768]);  mm_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None
+	        mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768)
+	        sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)
+	        mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_72);  mul_168 = None
+	        sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None
+	        mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, sum_38);  sum_38 = None
+	        sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None
+	        sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None
+	        mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None
+	        mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_72);  mul_72 = None
+	        sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None
+	        sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_110, [64, 768])
+	        mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None
+	        permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0])
+	        mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None
+	        permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None
+	        sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None
+	        view_197: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_41, [768]);  sum_41 = None
+	        permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None
+	        view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_27, [1, 64, 3072]);  mm_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]);  addmm_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None
+	        mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_8, tanh_8);  tanh_8 = None
+	        sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None
+	        mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None
+	        mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None
+	        mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715)
+	        pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None
+	        mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None
+	        mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_112, [64, 3072]);  add_112 = None
+	        mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None
+	        permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0])
+	        mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None
+	        permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None
+	        sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None
+	        view_200: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_42, [3072]);  sum_42 = None
+	        permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None
+	        view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_29, [1, 64, 768]);  mm_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None
+	        mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768)
+	        sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)
+	        mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_66);  mul_184 = None
+	        sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None
+	        mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, sum_44);  sum_44 = None
+	        sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None
+	        sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None
+	        mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None
+	        mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_66);  mul_66 = None
+	        sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None
+	        sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_113, [64, 768])
+	        mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None
+	        permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None
+	        mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None
+	        permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None
+	        sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None
+	        view_203: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_47, [768]);  sum_47 = None
+	        permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None
+	        view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_31, [1, 64, 768]);  mm_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]);  view_204 = None
+	        permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None
+	        getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0]
+	        getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1]
+	        getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None
+	        view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_174, [1, 64, 768]);  permute_174 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None
+	        view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_175, [1, 64, 768]);  permute_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None
+	        view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_176, [1, 64, 768]);  permute_176 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None
+	        view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_3, [64, 2304]);  cat_3 = None
+	        mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None
+	        permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0])
+	        mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None
+	        permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None
+	        sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None
+	        view_210: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_48, [2304]);  sum_48 = None
+	        permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None
+	        view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_33, [1, 64, 768]);  mm_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None
+	        mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768)
+	        sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)
+	        mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_64);  mul_191 = None
+	        sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None
+	        mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, sum_50);  sum_50 = None
+	        sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None
+	        sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None
+	        mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None
+	        mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_64);  mul_64 = None
+	        sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None
+	        sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_114, [64, 768])
+	        mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None
+	        permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0])
+	        mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None
+	        permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None
+	        sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None
+	        view_213: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_53, [768]);  sum_53 = None
+	        permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None
+	        view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_35, [1, 64, 3072]);  mm_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]);  addmm_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None
+	        mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_7, tanh_7);  tanh_7 = None
+	        sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None
+	        mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None
+	        mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None
+	        mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715)
+	        pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None
+	        mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None
+	        mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_116, [64, 3072]);  add_116 = None
+	        mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None
+	        permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0])
+	        mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None
+	        permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None
+	        sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None
+	        view_216: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_54, [3072]);  sum_54 = None
+	        permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None
+	        view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_37, [1, 64, 768]);  mm_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None
+	        mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768)
+	        sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)
+	        mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_58);  mul_207 = None
+	        sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None
+	        mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, sum_56);  sum_56 = None
+	        sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None
+	        sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None
+	        mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None
+	        mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_58);  mul_58 = None
+	        sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None
+	        sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_117, [64, 768])
+	        mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None
+	        permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None
+	        mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None
+	        permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None
+	        sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None
+	        view_219: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_59, [768]);  sum_59 = None
+	        permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None
+	        view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_39, [1, 64, 768]);  mm_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]);  view_220 = None
+	        permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None
+	        getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0]
+	        getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1]
+	        getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None
+	        view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_194, [1, 64, 768]);  permute_194 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None
+	        view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_195, [1, 64, 768]);  permute_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None
+	        view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_196, [1, 64, 768]);  permute_196 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None
+	        view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_4, [64, 2304]);  cat_4 = None
+	        mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None
+	        permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0])
+	        mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None
+	        permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None
+	        sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None
+	        view_226: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_60, [2304]);  sum_60 = None
+	        permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None
+	        view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_41, [1, 64, 768]);  mm_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None
+	        mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768)
+	        sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)
+	        mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_56);  mul_214 = None
+	        sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None
+	        mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, sum_62);  sum_62 = None
+	        sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None
+	        sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None
+	        mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None
+	        mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_56);  mul_56 = None
+	        sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None
+	        sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_118, [64, 768])
+	        mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None
+	        permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0])
+	        mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None
+	        permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None
+	        sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None
+	        view_229: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_65, [768]);  sum_65 = None
+	        permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None
+	        view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_43, [1, 64, 3072]);  mm_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]);  addmm_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None
+	        mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_6, tanh_6);  tanh_6 = None
+	        sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None
+	        mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None
+	        mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None
+	        mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715)
+	        pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None
+	        mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None
+	        mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_120, [64, 3072]);  add_120 = None
+	        mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None
+	        permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0])
+	        mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None
+	        permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None
+	        sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None
+	        view_232: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_66, [3072]);  sum_66 = None
+	        permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None
+	        view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_45, [1, 64, 768]);  mm_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None
+	        mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768)
+	        sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)
+	        mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_50);  mul_230 = None
+	        sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None
+	        mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, sum_68);  sum_68 = None
+	        sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None
+	        sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None
+	        mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None
+	        mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_50);  mul_50 = None
+	        sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None
+	        sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_121, [64, 768])
+	        mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None
+	        permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None
+	        mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None
+	        permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None
+	        sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None
+	        view_235: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_71, [768]);  sum_71 = None
+	        permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None
+	        view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_47, [1, 64, 768]);  mm_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]);  view_236 = None
+	        permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None
+	        getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0]
+	        getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1]
+	        getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None
+	        view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_214, [1, 64, 768]);  permute_214 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None
+	        view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_215, [1, 64, 768]);  permute_215 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None
+	        view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_216, [1, 64, 768]);  permute_216 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None
+	        view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_5, [64, 2304]);  cat_5 = None
+	        mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None
+	        permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0])
+	        mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None
+	        permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None
+	        sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None
+	        view_242: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_72, [2304]);  sum_72 = None
+	        permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None
+	        view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_49, [1, 64, 768]);  mm_49 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None
+	        mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768)
+	        sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)
+	        mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_48);  mul_237 = None
+	        sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None
+	        mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, sum_74);  sum_74 = None
+	        sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None
+	        sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None
+	        mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None
+	        mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_48);  mul_48 = None
+	        sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None
+	        sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_122, [64, 768])
+	        mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None
+	        permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0])
+	        mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None
+	        permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None
+	        sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None
+	        view_245: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_77, [768]);  sum_77 = None
+	        permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None
+	        view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_51, [1, 64, 3072]);  mm_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]);  addmm_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_5, tanh_5);  tanh_5 = None
+	        sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None
+	        mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None
+	        mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None
+	        mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715)
+	        pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None
+	        mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None
+	        mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_124, [64, 3072]);  add_124 = None
+	        mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None
+	        permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0])
+	        mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None
+	        permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None
+	        sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None
+	        view_248: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_78, [3072]);  sum_78 = None
+	        permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None
+	        view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_53, [1, 64, 768]);  mm_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None
+	        mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768)
+	        sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)
+	        mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_42);  mul_253 = None
+	        sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None
+	        mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, sum_80);  sum_80 = None
+	        sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None
+	        sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None
+	        mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None
+	        mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_42);  mul_42 = None
+	        sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None
+	        sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_125, [64, 768])
+	        mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None
+	        permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None
+	        mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None
+	        permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None
+	        sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None
+	        view_251: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_83, [768]);  sum_83 = None
+	        permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None
+	        view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_55, [1, 64, 768]);  mm_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]);  view_252 = None
+	        permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None
+	        getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0]
+	        getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1]
+	        getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None
+	        view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_234, [1, 64, 768]);  permute_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None
+	        view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_235, [1, 64, 768]);  permute_235 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None
+	        view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_236, [1, 64, 768]);  permute_236 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None
+	        view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_6, [64, 2304]);  cat_6 = None
+	        mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None
+	        permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0])
+	        mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None
+	        permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None
+	        sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None
+	        view_258: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_84, [2304]);  sum_84 = None
+	        permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None
+	        view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_57, [1, 64, 768]);  mm_57 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None
+	        mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768)
+	        sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)
+	        mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_40);  mul_260 = None
+	        sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None
+	        mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, sum_86);  sum_86 = None
+	        sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None
+	        sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None
+	        mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None
+	        mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_40);  mul_40 = None
+	        sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None
+	        sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_126, [64, 768])
+	        mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None
+	        permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0])
+	        mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None
+	        permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None
+	        sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None
+	        view_261: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_89, [768]);  sum_89 = None
+	        permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None
+	        view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_59, [1, 64, 3072]);  mm_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]);  addmm_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_4, tanh_4);  tanh_4 = None
+	        sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None
+	        mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None
+	        mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None
+	        mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715)
+	        pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None
+	        mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None
+	        mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_128, [64, 3072]);  add_128 = None
+	        mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None
+	        permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0])
+	        mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None
+	        permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None
+	        sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None
+	        view_264: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_90, [3072]);  sum_90 = None
+	        permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None
+	        view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_61, [1, 64, 768]);  mm_61 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None
+	        mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768)
+	        sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)
+	        mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_34);  mul_276 = None
+	        sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None
+	        mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, sum_92);  sum_92 = None
+	        sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None
+	        sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None
+	        mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None
+	        mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_34);  mul_34 = None
+	        sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None
+	        sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_129, [64, 768])
+	        mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None
+	        permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None
+	        mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None
+	        permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None
+	        sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None
+	        view_267: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_95, [768]);  sum_95 = None
+	        permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None
+	        view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_63, [1, 64, 768]);  mm_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]);  view_268 = None
+	        permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None
+	        getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0]
+	        getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1]
+	        getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None
+	        view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_254, [1, 64, 768]);  permute_254 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None
+	        view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_255, [1, 64, 768]);  permute_255 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None
+	        view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_256, [1, 64, 768]);  permute_256 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None
+	        view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_7, [64, 2304]);  cat_7 = None
+	        mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None
+	        permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0])
+	        mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None
+	        permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None
+	        sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None
+	        view_274: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_96, [2304]);  sum_96 = None
+	        permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None
+	        view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_65, [1, 64, 768]);  mm_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None
+	        mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768)
+	        sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)
+	        mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_32);  mul_283 = None
+	        sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None
+	        mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, sum_98);  sum_98 = None
+	        sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None
+	        sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None
+	        mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None
+	        mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_32);  mul_32 = None
+	        sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None
+	        sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_130, [64, 768])
+	        mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None
+	        permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0])
+	        mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None
+	        permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None
+	        sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None
+	        view_277: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_101, [768]);  sum_101 = None
+	        permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None
+	        view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_67, [1, 64, 3072]);  mm_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]);  addmm_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_3, tanh_3);  tanh_3 = None
+	        sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None
+	        mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None
+	        mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None
+	        mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715)
+	        pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None
+	        mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None
+	        mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_132, [64, 3072]);  add_132 = None
+	        mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None
+	        permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0])
+	        mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None
+	        permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None
+	        sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None
+	        view_280: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_102, [3072]);  sum_102 = None
+	        permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None
+	        view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_69, [1, 64, 768]);  mm_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None
+	        mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768)
+	        sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)
+	        mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_26);  mul_299 = None
+	        sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None
+	        mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, sum_104);  sum_104 = None
+	        sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None
+	        sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None
+	        mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None
+	        mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_26);  mul_26 = None
+	        sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None
+	        sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_133, [64, 768])
+	        mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None
+	        permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None
+	        mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None
+	        permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None
+	        sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None
+	        view_283: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_107, [768]);  sum_107 = None
+	        permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None
+	        view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_71, [1, 64, 768]);  mm_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]);  view_284 = None
+	        permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None
+	        getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0]
+	        getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1]
+	        getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None
+	        view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_274, [1, 64, 768]);  permute_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None
+	        view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_275, [1, 64, 768]);  permute_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None
+	        view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_276, [1, 64, 768]);  permute_276 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None
+	        view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_8, [64, 2304]);  cat_8 = None
+	        mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None
+	        permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0])
+	        mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None
+	        permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None
+	        sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None
+	        view_290: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_108, [2304]);  sum_108 = None
+	        permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None
+	        view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_73, [1, 64, 768]);  mm_73 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None
+	        mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768)
+	        sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)
+	        mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_24);  mul_306 = None
+	        sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None
+	        mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, sum_110);  sum_110 = None
+	        sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None
+	        sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None
+	        mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None
+	        mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_24);  mul_24 = None
+	        sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None
+	        sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_134, [64, 768])
+	        mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None
+	        permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0])
+	        mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None
+	        permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None
+	        sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None
+	        view_293: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_113, [768]);  sum_113 = None
+	        permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None
+	        view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_75, [1, 64, 3072]);  mm_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]);  addmm_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_2, tanh_2);  tanh_2 = None
+	        sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None
+	        mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None
+	        mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None
+	        mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715)
+	        pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None
+	        mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None
+	        mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_136, [64, 3072]);  add_136 = None
+	        mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None
+	        permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0])
+	        mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None
+	        permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None
+	        sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None
+	        view_296: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_114, [3072]);  sum_114 = None
+	        permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None
+	        view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_77, [1, 64, 768]);  mm_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None
+	        mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768)
+	        sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)
+	        mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_18);  mul_322 = None
+	        sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None
+	        mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, sum_116);  sum_116 = None
+	        sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None
+	        sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None
+	        mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None
+	        mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_18);  mul_18 = None
+	        sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None
+	        sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_137, [64, 768])
+	        mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None
+	        permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None
+	        mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None
+	        permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None
+	        sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None
+	        view_299: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_119, [768]);  sum_119 = None
+	        permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None
+	        view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_79, [1, 64, 768]);  mm_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]);  view_300 = None
+	        permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None
+	        getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0]
+	        getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1]
+	        getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None
+	        view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_294, [1, 64, 768]);  permute_294 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None
+	        view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_295, [1, 64, 768]);  permute_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None
+	        view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_296, [1, 64, 768]);  permute_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None
+	        view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_9, [64, 2304]);  cat_9 = None
+	        mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None
+	        permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0])
+	        mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None
+	        permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None
+	        sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None
+	        view_306: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_120, [2304]);  sum_120 = None
+	        permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None
+	        view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_81, [1, 64, 768]);  mm_81 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None
+	        mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768)
+	        sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)
+	        mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_16);  mul_329 = None
+	        sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None
+	        mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, sum_122);  sum_122 = None
+	        sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None
+	        sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None
+	        mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None
+	        mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_16);  mul_16 = None
+	        sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None
+	        sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_138, [64, 768])
+	        mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None
+	        permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0])
+	        mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None
+	        permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None
+	        sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None
+	        view_309: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_125, [768]);  sum_125 = None
+	        permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None
+	        view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_83, [1, 64, 3072]);  mm_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]);  addmm_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_1, tanh_1);  tanh_1 = None
+	        sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None
+	        mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None
+	        mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None
+	        mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715)
+	        pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None
+	        mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None
+	        mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_140, [64, 3072]);  add_140 = None
+	        mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None
+	        permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0])
+	        mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None
+	        permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None
+	        sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None
+	        view_312: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_126, [3072]);  sum_126 = None
+	        permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None
+	        view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_85, [1, 64, 768]);  mm_85 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None
+	        mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768)
+	        sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)
+	        mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_10);  mul_345 = None
+	        sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None
+	        mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, sum_128);  sum_128 = None
+	        sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None
+	        sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None
+	        mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None
+	        mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_10);  mul_10 = None
+	        sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None
+	        sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_141, [64, 768])
+	        mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None
+	        permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None
+	        mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None
+	        permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None
+	        sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None
+	        view_315: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_131, [768]);  sum_131 = None
+	        permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None
+	        view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_87, [1, 64, 768]);  mm_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]);  view_316 = None
+	        permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None
+	        getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0]
+	        getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1]
+	        getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None
+	        view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_314, [1, 64, 768]);  permute_314 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None
+	        view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_315, [1, 64, 768]);  permute_315 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None
+	        view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_316, [1, 64, 768]);  permute_316 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None
+	        view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_10, [64, 2304]);  cat_10 = None
+	        mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None
+	        permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0])
+	        mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None
+	        permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None
+	        sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None
+	        view_322: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_132, [2304]);  sum_132 = None
+	        permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None
+	        view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_89, [1, 64, 768]);  mm_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None
+	        mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768)
+	        sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)
+	        mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_8);  mul_352 = None
+	        sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None
+	        mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, sum_134);  sum_134 = None
+	        sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None
+	        sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None
+	        mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None
+	        mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_8);  mul_8 = None
+	        sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None
+	        sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_142, [64, 768])
+	        mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None
+	        permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0])
+	        mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None
+	        permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None
+	        sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None
+	        view_325: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_137, [768]);  sum_137 = None
+	        permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None
+	        view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(mm_91, [1, 64, 3072]);  mm_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]);  addmm_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh, tanh);  tanh = None
+	        sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None
+	        mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None
+	        mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None
+	        mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715)
+	        pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None
+	        mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None
+	        mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.view.default(add_144, [64, 3072]);  add_144 = None
+	        mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None
+	        permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0])
+	        mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None
+	        permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None
+	        sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None
+	        view_328: "f32[3072][1]cuda:0" = torch.ops.aten.view.default(sum_138, [3072]);  sum_138 = None
+	        permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None
+	        view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_93, [1, 64, 768]);  mm_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None
+	        mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768)
+	        sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)
+	        mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_2);  mul_368 = None
+	        sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None
+	        mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, sum_140);  sum_140 = None
+	        sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None
+	        sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None
+	        mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None
+	        mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_2);  mul_2 = None
+	        sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None
+	        sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(add_145, [64, 768])
+	        mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None
+	        permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None
+	        mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None
+	        permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None
+	        sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None
+	        view_331: "f32[768][1]cuda:0" = torch.ops.aten.view.default(sum_143, [768]);  sum_143 = None
+	        permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None
+	        view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_95, [1, 64, 768]);  mm_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]);  view_332 = None
+	        permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None
+	        getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0]
+	        getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1]
+	        getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None
+	        view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_334, [1, 64, 768]);  permute_334 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None
+	        view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_335, [1, 64, 768]);  permute_335 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None
+	        view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(permute_336, [1, 64, 768]);  permute_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None
+	        view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.view.default(cat_11, [64, 2304]);  cat_11 = None
+	        mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None
+	        permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0])
+	        mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None
+	        permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None
+	        sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None
+	        view_338: "f32[2304][1]cuda:0" = torch.ops.aten.view.default(sum_144, [2304]);  sum_144 = None
+	        permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None
+	        view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.view.default(mm_97, [1, 64, 768]);  mm_97 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None
+	        mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768)
+	        sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)
+	        mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul);  mul_375 = None
+	        sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None
+	        mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, sum_146);  sum_146 = None
+	        sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None
+	        sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None
+	        mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None
+	        mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul);  mul = None
+	        sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None
+	        sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1)
+	        unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None
+	        full_default_4: "f32[][]cuda:0" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146);  unsqueeze_1 = None
+	        full_default_5: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True);  full_default_5 = unsqueeze = where = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1)
+	        unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None
+	        where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146);  unsqueeze_2 = full_default_4 = add_146 = None
+	        full_default_7: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True);  full_default_7 = primals_1 = where_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None
+	        return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)
+	        
+V0806 13:55:55.887000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "059856ba64cec1d0c6ac4ed56652b6b2"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1722977755887911.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:55.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7483f59044110884390a2269fad6d0ef"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977755888329.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:55.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "83a9a08ca4aba301017b27afa68b083a"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977755888403.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:56.409000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "b567f1d921caff1709ea228157b31270"}
+	
+	import torch
+	from torch import tensor, device
+	import torch.fx as fx
+	from torch._dynamo.testing import rand_strided
+	from math import inf
+	import torch._inductor.inductor_prims
+	
+	import torch._dynamo.config
+	import torch._inductor.config
+	import torch._functorch.config
+	import torch.fx.experimental._config
+	
+	torch._inductor.config.triton.cudagraphs = True
+	torch._functorch.config.unlift_effect_tokens = True
+	
+	
+	
+	isolate_fails_code_str = None
+	
+	
+	
+	# torch version: 2.5.0a0+git6fbc72b
+	# torch cuda version: 12.0
+	# torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d
+	
+	
+	# CUDA Info: 
+	# nvcc: NVIDIA (R) Cuda compiler driver 
+	# Copyright (c) 2005-2023 NVIDIA Corporation 
+	# Built on Fri_Jan__6_16:45:21_PST_2023 
+	# Cuda compilation tools, release 12.0, V12.0.140 
+	# Build cuda_12.0.r12.0/compiler.32267302_0 
+	
+	# GPU Hardware Info: 
+	# NVIDIA H100 : 1 
+	
+	
+	from torch.nn import *
+	class Repro(torch.nn.Module):
+	    def __init__(self) -> None:
+	        super().__init__()
+	
+	    
+	    
+	    def forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149):
+	        iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)
+	        unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None
+	        embedding = torch.ops.aten.embedding.default(primals_2, primals_1)
+	        embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None
+	        add = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None
+	        var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)
+	        getitem = var_mean[0]
+	        getitem_1 = var_mean[1];  var_mean = None
+	        add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None
+	        rsqrt = torch.ops.aten.rsqrt.default(add_1);  add_1 = None
+	        sub = torch.ops.aten.sub.Tensor(add, getitem_1);  getitem_1 = None
+	        mul = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None
+	        mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4)
+	        add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None
+	        view = torch.ops.aten.view.default(add_2, [64, 768]);  add_2 = None
+	        permute = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None
+	        addmm = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None
+	        view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]);  addmm = None
+	        split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None
+	        getitem_2 = split[0]
+	        getitem_3 = split[1]
+	        getitem_4 = split[2];  split = None
+	        view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None
+	        permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None
+	        view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None
+	        permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None
+	        view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None
+	        permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None
+	        _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)
+	        getitem_5 = _scaled_dot_product_efficient_attention[0]
+	        getitem_6 = _scaled_dot_product_efficient_attention[1]
+	        getitem_7 = _scaled_dot_product_efficient_attention[2]
+	        getitem_8 = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None
+	        permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None
+	        permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None
+	        addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5);  primals_9 = view_6 = None
+	        view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]);  addmm_1 = None
+	        add_3 = torch.ops.aten.add.Tensor(add, view_7);  add = view_7 = None
+	        var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)
+	        getitem_9 = var_mean_1[0]
+	        getitem_10 = var_mean_1[1];  var_mean_1 = None
+	        add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None
+	        rsqrt_1 = torch.ops.aten.rsqrt.default(add_4);  add_4 = None
+	        sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10);  getitem_10 = None
+	        mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None
+	        mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10)
+	        add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None
+	        view_8 = torch.ops.aten.view.default(add_5, [64, 768]);  add_5 = None
+	        permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None
+	        addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None
+	        view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])
+	        mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  view_9 = mul_5 = None
+	        mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        add_7 = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None
+	        mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7);  mul_4 = add_7 = None
+	        view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]);  mul_7 = None
+	        permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None
+	        addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7);  primals_15 = None
+	        view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]);  addmm_3 = None
+	        add_8 = torch.ops.aten.add.Tensor(add_3, view_11);  add_3 = view_11 = None
+	        var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)
+	        getitem_11 = var_mean_2[0]
+	        getitem_12 = var_mean_2[1];  var_mean_2 = None
+	        add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None
+	        rsqrt_2 = torch.ops.aten.rsqrt.default(add_9);  add_9 = None
+	        sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12);  getitem_12 = None
+	        mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None
+	        mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16)
+	        add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None
+	        view_12 = torch.ops.aten.view.default(add_10, [64, 768]);  add_10 = None
+	        permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None
+	        addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None
+	        view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]);  addmm_4 = None
+	        split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None
+	        getitem_13 = split_1[0]
+	        getitem_14 = split_1[1]
+	        getitem_15 = split_1[2];  split_1 = None
+	        view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None
+	        permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None
+	        view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None
+	        permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None
+	        view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None
+	        permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None
+	        _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)
+	        getitem_16 = _scaled_dot_product_efficient_attention_1[0]
+	        getitem_17 = _scaled_dot_product_efficient_attention_1[1]
+	        getitem_18 = _scaled_dot_product_efficient_attention_1[2]
+	        getitem_19 = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None
+	        permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None
+	        permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None
+	        addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13);  primals_21 = view_18 = None
+	        view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]);  addmm_5 = None
+	        add_11 = torch.ops.aten.add.Tensor(add_8, view_19);  add_8 = view_19 = None
+	        var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)
+	        getitem_20 = var_mean_3[0]
+	        getitem_21 = var_mean_3[1];  var_mean_3 = None
+	        add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None
+	        rsqrt_3 = torch.ops.aten.rsqrt.default(add_12);  add_12 = None
+	        sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21);  getitem_21 = None
+	        mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None
+	        mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22)
+	        add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None
+	        view_20 = torch.ops.aten.view.default(add_13, [64, 768]);  add_13 = None
+	        permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None
+	        addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None
+	        view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])
+	        mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  view_21 = mul_13 = None
+	        mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None
+	        mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15);  mul_12 = add_15 = None
+	        view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]);  mul_15 = None
+	        permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None
+	        addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15);  primals_27 = None
+	        view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]);  addmm_7 = None
+	        add_16 = torch.ops.aten.add.Tensor(add_11, view_23);  add_11 = view_23 = None
+	        var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)
+	        getitem_22 = var_mean_4[0]
+	        getitem_23 = var_mean_4[1];  var_mean_4 = None
+	        add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None
+	        rsqrt_4 = torch.ops.aten.rsqrt.default(add_17);  add_17 = None
+	        sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23);  getitem_23 = None
+	        mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None
+	        mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28)
+	        add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None
+	        view_24 = torch.ops.aten.view.default(add_18, [64, 768]);  add_18 = None
+	        permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None
+	        addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None
+	        view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]);  addmm_8 = None
+	        split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None
+	        getitem_24 = split_2[0]
+	        getitem_25 = split_2[1]
+	        getitem_26 = split_2[2];  split_2 = None
+	        view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None
+	        permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None
+	        view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None
+	        permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None
+	        view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None
+	        permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None
+	        _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)
+	        getitem_27 = _scaled_dot_product_efficient_attention_2[0]
+	        getitem_28 = _scaled_dot_product_efficient_attention_2[1]
+	        getitem_29 = _scaled_dot_product_efficient_attention_2[2]
+	        getitem_30 = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None
+	        permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None
+	        permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None
+	        addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21);  primals_33 = view_30 = None
+	        view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]);  addmm_9 = None
+	        add_19 = torch.ops.aten.add.Tensor(add_16, view_31);  add_16 = view_31 = None
+	        var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)
+	        getitem_31 = var_mean_5[0]
+	        getitem_32 = var_mean_5[1];  var_mean_5 = None
+	        add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None
+	        rsqrt_5 = torch.ops.aten.rsqrt.default(add_20);  add_20 = None
+	        sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32);  getitem_32 = None
+	        mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None
+	        mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34)
+	        add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None
+	        view_32 = torch.ops.aten.view.default(add_21, [64, 768]);  add_21 = None
+	        permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None
+	        addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None
+	        view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])
+	        mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  view_33 = mul_21 = None
+	        mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None
+	        mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23);  mul_20 = add_23 = None
+	        view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]);  mul_23 = None
+	        permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None
+	        addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23);  primals_39 = None
+	        view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]);  addmm_11 = None
+	        add_24 = torch.ops.aten.add.Tensor(add_19, view_35);  add_19 = view_35 = None
+	        var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)
+	        getitem_33 = var_mean_6[0]
+	        getitem_34 = var_mean_6[1];  var_mean_6 = None
+	        add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None
+	        rsqrt_6 = torch.ops.aten.rsqrt.default(add_25);  add_25 = None
+	        sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34);  getitem_34 = None
+	        mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None
+	        mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40)
+	        add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None
+	        view_36 = torch.ops.aten.view.default(add_26, [64, 768]);  add_26 = None
+	        permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None
+	        addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None
+	        view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]);  addmm_12 = None
+	        split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None
+	        getitem_35 = split_3[0]
+	        getitem_36 = split_3[1]
+	        getitem_37 = split_3[2];  split_3 = None
+	        view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None
+	        permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None
+	        view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None
+	        permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None
+	        view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None
+	        permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None
+	        _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)
+	        getitem_38 = _scaled_dot_product_efficient_attention_3[0]
+	        getitem_39 = _scaled_dot_product_efficient_attention_3[1]
+	        getitem_40 = _scaled_dot_product_efficient_attention_3[2]
+	        getitem_41 = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None
+	        permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None
+	        permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None
+	        addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29);  primals_45 = view_42 = None
+	        view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]);  addmm_13 = None
+	        add_27 = torch.ops.aten.add.Tensor(add_24, view_43);  add_24 = view_43 = None
+	        var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)
+	        getitem_42 = var_mean_7[0]
+	        getitem_43 = var_mean_7[1];  var_mean_7 = None
+	        add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None
+	        rsqrt_7 = torch.ops.aten.rsqrt.default(add_28);  add_28 = None
+	        sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43);  getitem_43 = None
+	        mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None
+	        mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46)
+	        add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None
+	        view_44 = torch.ops.aten.view.default(add_29, [64, 768]);  add_29 = None
+	        permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None
+	        addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None
+	        view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])
+	        mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  view_45 = mul_29 = None
+	        mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None
+	        mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31);  mul_28 = add_31 = None
+	        view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]);  mul_31 = None
+	        permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None
+	        addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31);  primals_51 = None
+	        view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]);  addmm_15 = None
+	        add_32 = torch.ops.aten.add.Tensor(add_27, view_47);  add_27 = view_47 = None
+	        var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)
+	        getitem_44 = var_mean_8[0]
+	        getitem_45 = var_mean_8[1];  var_mean_8 = None
+	        add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None
+	        rsqrt_8 = torch.ops.aten.rsqrt.default(add_33);  add_33 = None
+	        sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45);  getitem_45 = None
+	        mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None
+	        mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52)
+	        add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None
+	        view_48 = torch.ops.aten.view.default(add_34, [64, 768]);  add_34 = None
+	        permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None
+	        addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None
+	        view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]);  addmm_16 = None
+	        split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None
+	        getitem_46 = split_4[0]
+	        getitem_47 = split_4[1]
+	        getitem_48 = split_4[2];  split_4 = None
+	        view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None
+	        permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None
+	        view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None
+	        permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None
+	        view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None
+	        permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None
+	        _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)
+	        getitem_49 = _scaled_dot_product_efficient_attention_4[0]
+	        getitem_50 = _scaled_dot_product_efficient_attention_4[1]
+	        getitem_51 = _scaled_dot_product_efficient_attention_4[2]
+	        getitem_52 = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None
+	        permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None
+	        permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None
+	        addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37);  primals_57 = view_54 = None
+	        view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]);  addmm_17 = None
+	        add_35 = torch.ops.aten.add.Tensor(add_32, view_55);  add_32 = view_55 = None
+	        var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)
+	        getitem_53 = var_mean_9[0]
+	        getitem_54 = var_mean_9[1];  var_mean_9 = None
+	        add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None
+	        rsqrt_9 = torch.ops.aten.rsqrt.default(add_36);  add_36 = None
+	        sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54);  getitem_54 = None
+	        mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None
+	        mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58)
+	        add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None
+	        view_56 = torch.ops.aten.view.default(add_37, [64, 768]);  add_37 = None
+	        permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None
+	        addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None
+	        view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])
+	        mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  view_57 = mul_37 = None
+	        mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None
+	        mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39);  mul_36 = add_39 = None
+	        view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]);  mul_39 = None
+	        permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None
+	        addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39);  primals_63 = None
+	        view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]);  addmm_19 = None
+	        add_40 = torch.ops.aten.add.Tensor(add_35, view_59);  add_35 = view_59 = None
+	        var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)
+	        getitem_55 = var_mean_10[0]
+	        getitem_56 = var_mean_10[1];  var_mean_10 = None
+	        add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None
+	        rsqrt_10 = torch.ops.aten.rsqrt.default(add_41);  add_41 = None
+	        sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56);  getitem_56 = None
+	        mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None
+	        mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64)
+	        add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None
+	        view_60 = torch.ops.aten.view.default(add_42, [64, 768]);  add_42 = None
+	        permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None
+	        addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None
+	        view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]);  addmm_20 = None
+	        split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None
+	        getitem_57 = split_5[0]
+	        getitem_58 = split_5[1]
+	        getitem_59 = split_5[2];  split_5 = None
+	        view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None
+	        permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None
+	        view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None
+	        permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None
+	        view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None
+	        permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None
+	        _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)
+	        getitem_60 = _scaled_dot_product_efficient_attention_5[0]
+	        getitem_61 = _scaled_dot_product_efficient_attention_5[1]
+	        getitem_62 = _scaled_dot_product_efficient_attention_5[2]
+	        getitem_63 = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None
+	        permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None
+	        permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None
+	        addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45);  primals_69 = view_66 = None
+	        view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]);  addmm_21 = None
+	        add_43 = torch.ops.aten.add.Tensor(add_40, view_67);  add_40 = view_67 = None
+	        var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)
+	        getitem_64 = var_mean_11[0]
+	        getitem_65 = var_mean_11[1];  var_mean_11 = None
+	        add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None
+	        rsqrt_11 = torch.ops.aten.rsqrt.default(add_44);  add_44 = None
+	        sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65);  getitem_65 = None
+	        mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None
+	        mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70)
+	        add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None
+	        view_68 = torch.ops.aten.view.default(add_45, [64, 768]);  add_45 = None
+	        permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None
+	        addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None
+	        view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])
+	        mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  view_69 = mul_45 = None
+	        mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None
+	        mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47);  mul_44 = add_47 = None
+	        view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]);  mul_47 = None
+	        permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None
+	        addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47);  primals_75 = None
+	        view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]);  addmm_23 = None
+	        add_48 = torch.ops.aten.add.Tensor(add_43, view_71);  add_43 = view_71 = None
+	        var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)
+	        getitem_66 = var_mean_12[0]
+	        getitem_67 = var_mean_12[1];  var_mean_12 = None
+	        add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None
+	        rsqrt_12 = torch.ops.aten.rsqrt.default(add_49);  add_49 = None
+	        sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67);  getitem_67 = None
+	        mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None
+	        mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76)
+	        add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None
+	        view_72 = torch.ops.aten.view.default(add_50, [64, 768]);  add_50 = None
+	        permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None
+	        addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None
+	        view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]);  addmm_24 = None
+	        split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None
+	        getitem_68 = split_6[0]
+	        getitem_69 = split_6[1]
+	        getitem_70 = split_6[2];  split_6 = None
+	        view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None
+	        permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None
+	        view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None
+	        permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None
+	        view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None
+	        permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None
+	        _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)
+	        getitem_71 = _scaled_dot_product_efficient_attention_6[0]
+	        getitem_72 = _scaled_dot_product_efficient_attention_6[1]
+	        getitem_73 = _scaled_dot_product_efficient_attention_6[2]
+	        getitem_74 = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None
+	        permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None
+	        permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None
+	        addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53);  primals_81 = view_78 = None
+	        view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]);  addmm_25 = None
+	        add_51 = torch.ops.aten.add.Tensor(add_48, view_79);  add_48 = view_79 = None
+	        var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)
+	        getitem_75 = var_mean_13[0]
+	        getitem_76 = var_mean_13[1];  var_mean_13 = None
+	        add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None
+	        rsqrt_13 = torch.ops.aten.rsqrt.default(add_52);  add_52 = None
+	        sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76);  getitem_76 = None
+	        mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None
+	        mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82)
+	        add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None
+	        view_80 = torch.ops.aten.view.default(add_53, [64, 768]);  add_53 = None
+	        permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None
+	        addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None
+	        view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])
+	        mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  view_81 = mul_53 = None
+	        mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None
+	        mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55);  mul_52 = add_55 = None
+	        view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]);  mul_55 = None
+	        permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None
+	        addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55);  primals_87 = None
+	        view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]);  addmm_27 = None
+	        add_56 = torch.ops.aten.add.Tensor(add_51, view_83);  add_51 = view_83 = None
+	        var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)
+	        getitem_77 = var_mean_14[0]
+	        getitem_78 = var_mean_14[1];  var_mean_14 = None
+	        add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None
+	        rsqrt_14 = torch.ops.aten.rsqrt.default(add_57);  add_57 = None
+	        sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78);  getitem_78 = None
+	        mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None
+	        mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88)
+	        add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None
+	        view_84 = torch.ops.aten.view.default(add_58, [64, 768]);  add_58 = None
+	        permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None
+	        addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None
+	        view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]);  addmm_28 = None
+	        split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None
+	        getitem_79 = split_7[0]
+	        getitem_80 = split_7[1]
+	        getitem_81 = split_7[2];  split_7 = None
+	        view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None
+	        permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None
+	        view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None
+	        permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None
+	        view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None
+	        permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None
+	        _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)
+	        getitem_82 = _scaled_dot_product_efficient_attention_7[0]
+	        getitem_83 = _scaled_dot_product_efficient_attention_7[1]
+	        getitem_84 = _scaled_dot_product_efficient_attention_7[2]
+	        getitem_85 = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None
+	        permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None
+	        permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None
+	        addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61);  primals_93 = view_90 = None
+	        view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]);  addmm_29 = None
+	        add_59 = torch.ops.aten.add.Tensor(add_56, view_91);  add_56 = view_91 = None
+	        var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)
+	        getitem_86 = var_mean_15[0]
+	        getitem_87 = var_mean_15[1];  var_mean_15 = None
+	        add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None
+	        rsqrt_15 = torch.ops.aten.rsqrt.default(add_60);  add_60 = None
+	        sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87);  getitem_87 = None
+	        mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None
+	        mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94)
+	        add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None
+	        view_92 = torch.ops.aten.view.default(add_61, [64, 768]);  add_61 = None
+	        permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None
+	        addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None
+	        view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])
+	        mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  view_93 = mul_61 = None
+	        mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None
+	        mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63);  mul_60 = add_63 = None
+	        view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]);  mul_63 = None
+	        permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None
+	        addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63);  primals_99 = None
+	        view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]);  addmm_31 = None
+	        add_64 = torch.ops.aten.add.Tensor(add_59, view_95);  add_59 = view_95 = None
+	        var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)
+	        getitem_88 = var_mean_16[0]
+	        getitem_89 = var_mean_16[1];  var_mean_16 = None
+	        add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None
+	        rsqrt_16 = torch.ops.aten.rsqrt.default(add_65);  add_65 = None
+	        sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89);  getitem_89 = None
+	        mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None
+	        mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100)
+	        add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None
+	        view_96 = torch.ops.aten.view.default(add_66, [64, 768]);  add_66 = None
+	        permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None
+	        addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None
+	        view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]);  addmm_32 = None
+	        split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None
+	        getitem_90 = split_8[0]
+	        getitem_91 = split_8[1]
+	        getitem_92 = split_8[2];  split_8 = None
+	        view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None
+	        permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None
+	        view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None
+	        permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None
+	        view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None
+	        permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None
+	        _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)
+	        getitem_93 = _scaled_dot_product_efficient_attention_8[0]
+	        getitem_94 = _scaled_dot_product_efficient_attention_8[1]
+	        getitem_95 = _scaled_dot_product_efficient_attention_8[2]
+	        getitem_96 = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None
+	        permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None
+	        permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None
+	        addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69);  primals_105 = view_102 = None
+	        view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]);  addmm_33 = None
+	        add_67 = torch.ops.aten.add.Tensor(add_64, view_103);  add_64 = view_103 = None
+	        var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)
+	        getitem_97 = var_mean_17[0]
+	        getitem_98 = var_mean_17[1];  var_mean_17 = None
+	        add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None
+	        rsqrt_17 = torch.ops.aten.rsqrt.default(add_68);  add_68 = None
+	        sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98);  getitem_98 = None
+	        mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None
+	        mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106)
+	        add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None
+	        view_104 = torch.ops.aten.view.default(add_69, [64, 768]);  add_69 = None
+	        permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None
+	        addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None
+	        view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])
+	        mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  view_105 = mul_69 = None
+	        mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None
+	        mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71);  mul_68 = add_71 = None
+	        view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]);  mul_71 = None
+	        permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None
+	        addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71);  primals_111 = None
+	        view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]);  addmm_35 = None
+	        add_72 = torch.ops.aten.add.Tensor(add_67, view_107);  add_67 = view_107 = None
+	        var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)
+	        getitem_99 = var_mean_18[0]
+	        getitem_100 = var_mean_18[1];  var_mean_18 = None
+	        add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None
+	        rsqrt_18 = torch.ops.aten.rsqrt.default(add_73);  add_73 = None
+	        sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100);  getitem_100 = None
+	        mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None
+	        mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112)
+	        add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None
+	        view_108 = torch.ops.aten.view.default(add_74, [64, 768]);  add_74 = None
+	        permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None
+	        addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None
+	        view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]);  addmm_36 = None
+	        split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None
+	        getitem_101 = split_9[0]
+	        getitem_102 = split_9[1]
+	        getitem_103 = split_9[2];  split_9 = None
+	        view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None
+	        permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None
+	        view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None
+	        permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None
+	        view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None
+	        permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None
+	        _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)
+	        getitem_104 = _scaled_dot_product_efficient_attention_9[0]
+	        getitem_105 = _scaled_dot_product_efficient_attention_9[1]
+	        getitem_106 = _scaled_dot_product_efficient_attention_9[2]
+	        getitem_107 = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None
+	        permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None
+	        permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None
+	        addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77);  primals_117 = view_114 = None
+	        view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]);  addmm_37 = None
+	        add_75 = torch.ops.aten.add.Tensor(add_72, view_115);  add_72 = view_115 = None
+	        var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)
+	        getitem_108 = var_mean_19[0]
+	        getitem_109 = var_mean_19[1];  var_mean_19 = None
+	        add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None
+	        rsqrt_19 = torch.ops.aten.rsqrt.default(add_76);  add_76 = None
+	        sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109);  getitem_109 = None
+	        mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None
+	        mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118)
+	        add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None
+	        view_116 = torch.ops.aten.view.default(add_77, [64, 768]);  add_77 = None
+	        permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None
+	        addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None
+	        view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])
+	        mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  view_117 = mul_77 = None
+	        mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None
+	        mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79);  mul_76 = add_79 = None
+	        view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]);  mul_79 = None
+	        permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None
+	        addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79);  primals_123 = None
+	        view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]);  addmm_39 = None
+	        add_80 = torch.ops.aten.add.Tensor(add_75, view_119);  add_75 = view_119 = None
+	        var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)
+	        getitem_110 = var_mean_20[0]
+	        getitem_111 = var_mean_20[1];  var_mean_20 = None
+	        add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None
+	        rsqrt_20 = torch.ops.aten.rsqrt.default(add_81);  add_81 = None
+	        sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111);  getitem_111 = None
+	        mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None
+	        mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124)
+	        add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None
+	        view_120 = torch.ops.aten.view.default(add_82, [64, 768]);  add_82 = None
+	        permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None
+	        addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None
+	        view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]);  addmm_40 = None
+	        split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None
+	        getitem_112 = split_10[0]
+	        getitem_113 = split_10[1]
+	        getitem_114 = split_10[2];  split_10 = None
+	        view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None
+	        permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None
+	        view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None
+	        permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None
+	        view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None
+	        permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None
+	        _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)
+	        getitem_115 = _scaled_dot_product_efficient_attention_10[0]
+	        getitem_116 = _scaled_dot_product_efficient_attention_10[1]
+	        getitem_117 = _scaled_dot_product_efficient_attention_10[2]
+	        getitem_118 = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None
+	        permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None
+	        permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None
+	        addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85);  primals_129 = view_126 = None
+	        view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]);  addmm_41 = None
+	        add_83 = torch.ops.aten.add.Tensor(add_80, view_127);  add_80 = view_127 = None
+	        var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)
+	        getitem_119 = var_mean_21[0]
+	        getitem_120 = var_mean_21[1];  var_mean_21 = None
+	        add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None
+	        rsqrt_21 = torch.ops.aten.rsqrt.default(add_84);  add_84 = None
+	        sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120);  getitem_120 = None
+	        mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None
+	        mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130)
+	        add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None
+	        view_128 = torch.ops.aten.view.default(add_85, [64, 768]);  add_85 = None
+	        permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None
+	        addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None
+	        view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])
+	        mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  view_129 = mul_85 = None
+	        mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None
+	        mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87);  mul_84 = add_87 = None
+	        view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]);  mul_87 = None
+	        permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None
+	        addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87);  primals_135 = None
+	        view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]);  addmm_43 = None
+	        add_88 = torch.ops.aten.add.Tensor(add_83, view_131);  add_83 = view_131 = None
+	        var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)
+	        getitem_121 = var_mean_22[0]
+	        getitem_122 = var_mean_22[1];  var_mean_22 = None
+	        add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None
+	        rsqrt_22 = torch.ops.aten.rsqrt.default(add_89);  add_89 = None
+	        sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122);  getitem_122 = None
+	        mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None
+	        mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136)
+	        add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None
+	        view_132 = torch.ops.aten.view.default(add_90, [64, 768]);  add_90 = None
+	        permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None
+	        addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None
+	        view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]);  addmm_44 = None
+	        split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None
+	        getitem_123 = split_11[0]
+	        getitem_124 = split_11[1]
+	        getitem_125 = split_11[2];  split_11 = None
+	        view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None
+	        permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None
+	        view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None
+	        permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None
+	        view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None
+	        permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None
+	        _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)
+	        getitem_126 = _scaled_dot_product_efficient_attention_11[0]
+	        getitem_127 = _scaled_dot_product_efficient_attention_11[1]
+	        getitem_128 = _scaled_dot_product_efficient_attention_11[2]
+	        getitem_129 = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None
+	        permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None
+	        permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None
+	        addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93);  primals_141 = view_138 = None
+	        view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]);  addmm_45 = None
+	        add_91 = torch.ops.aten.add.Tensor(add_88, view_139);  add_88 = view_139 = None
+	        var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)
+	        getitem_130 = var_mean_23[0]
+	        getitem_131 = var_mean_23[1];  var_mean_23 = None
+	        add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None
+	        rsqrt_23 = torch.ops.aten.rsqrt.default(add_92);  add_92 = None
+	        sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131);  getitem_131 = None
+	        mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None
+	        mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142)
+	        add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None
+	        view_140 = torch.ops.aten.view.default(add_93, [64, 768]);  add_93 = None
+	        permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None
+	        addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None
+	        view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])
+	        mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  view_141 = mul_93 = None
+	        mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None
+	        mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95);  mul_92 = add_95 = None
+	        view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]);  mul_95 = None
+	        permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None
+	        addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95);  primals_147 = None
+	        view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]);  addmm_47 = None
+	        add_96 = torch.ops.aten.add.Tensor(add_91, view_143);  add_91 = view_143 = None
+	        var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)
+	        getitem_132 = var_mean_24[0]
+	        getitem_133 = var_mean_24[1];  var_mean_24 = None
+	        add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None
+	        rsqrt_24 = torch.ops.aten.rsqrt.default(add_97);  add_97 = None
+	        sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None
+	        mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None
+	        mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148)
+	        add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None
+	        full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index = torch.ops.aten.index.Tensor(add_98, [None, full_default]);  add_98 = None
+	        permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None
+	        view_144 = torch.ops.aten.view.default(index, [1, 768]);  index = None
+	        mm = torch.ops.aten.mm.default(view_144, permute_96)
+	        view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]);  mm = None
+	        permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None
+	        div = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None
+	        permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None
+	        permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None
+	        div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None
+	        permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None
+	        permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None
+	        div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None
+	        permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None
+	        permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None
+	        div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None
+	        permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None
+	        permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None
+	        div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None
+	        permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None
+	        permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None
+	        div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None
+	        permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None
+	        permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None
+	        div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None
+	        permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None
+	        permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None
+	        div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None
+	        permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None
+	        permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None
+	        div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None
+	        permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None
+	        permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None
+	        div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None
+	        permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None
+	        permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None
+	        div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None
+	        permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None
+	        permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None
+	        div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None
+	        permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None
+	        permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None
+	        div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None
+	        permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None
+	        permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None
+	        div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None
+	        permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None
+	        permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None
+	        div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None
+	        permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None
+	        permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None
+	        div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None
+	        permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None
+	        permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None
+	        div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None
+	        permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None
+	        permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None
+	        div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None
+	        permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None
+	        permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None
+	        div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None
+	        permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None
+	        permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None
+	        div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None
+	        permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None
+	        permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None
+	        div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None
+	        permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None
+	        permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None
+	        div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None
+	        permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None
+	        permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None
+	        div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None
+	        permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None
+	        permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None
+	        div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None
+	        permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None
+	        permute_337 = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None
+	        div_24 = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None
+	        return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)
+	        
+	def load_args(reader):
+	    buf0 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf0, (1, 64), dtype=torch.int64, is_leaf=True)  # primals_1
+	    buf1 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf1, (50304, 768), is_leaf=True)  # primals_2
+	    buf2 = reader.storage(None, 3145728, device=device(type='cuda', index=0))
+	    reader.tensor(buf2, (1024, 768), is_leaf=True)  # primals_3
+	    buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf3, (768,), is_leaf=True)  # primals_4
+	    buf4 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf4, (768,), is_leaf=True)  # primals_5
+	    buf5 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf5, (2304, 768), is_leaf=True)  # primals_6
+	    buf6 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf6, (2304,), is_leaf=True)  # primals_7
+	    buf7 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf7, (768, 768), is_leaf=True)  # primals_8
+	    buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf8, (768,), is_leaf=True)  # primals_9
+	    buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf9, (768,), is_leaf=True)  # primals_10
+	    buf10 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf10, (768,), is_leaf=True)  # primals_11
+	    buf11 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf11, (3072, 768), is_leaf=True)  # primals_12
+	    buf12 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf12, (3072,), is_leaf=True)  # primals_13
+	    buf13 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf13, (768, 3072), is_leaf=True)  # primals_14
+	    buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf14, (768,), is_leaf=True)  # primals_15
+	    buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf15, (768,), is_leaf=True)  # primals_16
+	    buf16 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf16, (768,), is_leaf=True)  # primals_17
+	    buf17 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf17, (2304, 768), is_leaf=True)  # primals_18
+	    buf18 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf18, (2304,), is_leaf=True)  # primals_19
+	    buf19 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf19, (768, 768), is_leaf=True)  # primals_20
+	    buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf20, (768,), is_leaf=True)  # primals_21
+	    buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf21, (768,), is_leaf=True)  # primals_22
+	    buf22 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf22, (768,), is_leaf=True)  # primals_23
+	    buf23 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf23, (3072, 768), is_leaf=True)  # primals_24
+	    buf24 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf24, (3072,), is_leaf=True)  # primals_25
+	    buf25 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf25, (768, 3072), is_leaf=True)  # primals_26
+	    buf26 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf26, (768,), is_leaf=True)  # primals_27
+	    buf27 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf27, (768,), is_leaf=True)  # primals_28
+	    buf28 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf28, (768,), is_leaf=True)  # primals_29
+	    buf29 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf29, (2304, 768), is_leaf=True)  # primals_30
+	    buf30 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf30, (2304,), is_leaf=True)  # primals_31
+	    buf31 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf31, (768, 768), is_leaf=True)  # primals_32
+	    buf32 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf32, (768,), is_leaf=True)  # primals_33
+	    buf33 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf33, (768,), is_leaf=True)  # primals_34
+	    buf34 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf34, (768,), is_leaf=True)  # primals_35
+	    buf35 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf35, (3072, 768), is_leaf=True)  # primals_36
+	    buf36 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf36, (3072,), is_leaf=True)  # primals_37
+	    buf37 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf37, (768, 3072), is_leaf=True)  # primals_38
+	    buf38 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf38, (768,), is_leaf=True)  # primals_39
+	    buf39 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf39, (768,), is_leaf=True)  # primals_40
+	    buf40 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf40, (768,), is_leaf=True)  # primals_41
+	    buf41 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf41, (2304, 768), is_leaf=True)  # primals_42
+	    buf42 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf42, (2304,), is_leaf=True)  # primals_43
+	    buf43 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf43, (768, 768), is_leaf=True)  # primals_44
+	    buf44 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf44, (768,), is_leaf=True)  # primals_45
+	    buf45 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf45, (768,), is_leaf=True)  # primals_46
+	    buf46 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf46, (768,), is_leaf=True)  # primals_47
+	    buf47 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf47, (3072, 768), is_leaf=True)  # primals_48
+	    buf48 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf48, (3072,), is_leaf=True)  # primals_49
+	    buf49 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf49, (768, 3072), is_leaf=True)  # primals_50
+	    buf50 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf50, (768,), is_leaf=True)  # primals_51
+	    buf51 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf51, (768,), is_leaf=True)  # primals_52
+	    buf52 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf52, (768,), is_leaf=True)  # primals_53
+	    buf53 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf53, (2304, 768), is_leaf=True)  # primals_54
+	    buf54 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf54, (2304,), is_leaf=True)  # primals_55
+	    buf55 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf55, (768, 768), is_leaf=True)  # primals_56
+	    buf56 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf56, (768,), is_leaf=True)  # primals_57
+	    buf57 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf57, (768,), is_leaf=True)  # primals_58
+	    buf58 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf58, (768,), is_leaf=True)  # primals_59
+	    buf59 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf59, (3072, 768), is_leaf=True)  # primals_60
+	    buf60 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf60, (3072,), is_leaf=True)  # primals_61
+	    buf61 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf61, (768, 3072), is_leaf=True)  # primals_62
+	    buf62 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf62, (768,), is_leaf=True)  # primals_63
+	    buf63 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf63, (768,), is_leaf=True)  # primals_64
+	    buf64 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf64, (768,), is_leaf=True)  # primals_65
+	    buf65 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf65, (2304, 768), is_leaf=True)  # primals_66
+	    buf66 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf66, (2304,), is_leaf=True)  # primals_67
+	    buf67 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf67, (768, 768), is_leaf=True)  # primals_68
+	    buf68 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf68, (768,), is_leaf=True)  # primals_69
+	    buf69 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf69, (768,), is_leaf=True)  # primals_70
+	    buf70 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf70, (768,), is_leaf=True)  # primals_71
+	    buf71 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf71, (3072, 768), is_leaf=True)  # primals_72
+	    buf72 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf72, (3072,), is_leaf=True)  # primals_73
+	    buf73 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf73, (768, 3072), is_leaf=True)  # primals_74
+	    buf74 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf74, (768,), is_leaf=True)  # primals_75
+	    buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf75, (768,), is_leaf=True)  # primals_76
+	    buf76 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf76, (768,), is_leaf=True)  # primals_77
+	    buf77 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf77, (2304, 768), is_leaf=True)  # primals_78
+	    buf78 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf78, (2304,), is_leaf=True)  # primals_79
+	    buf79 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf79, (768, 768), is_leaf=True)  # primals_80
+	    buf80 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf80, (768,), is_leaf=True)  # primals_81
+	    buf81 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf81, (768,), is_leaf=True)  # primals_82
+	    buf82 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf82, (768,), is_leaf=True)  # primals_83
+	    buf83 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf83, (3072, 768), is_leaf=True)  # primals_84
+	    buf84 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf84, (3072,), is_leaf=True)  # primals_85
+	    buf85 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf85, (768, 3072), is_leaf=True)  # primals_86
+	    buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf86, (768,), is_leaf=True)  # primals_87
+	    buf87 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf87, (768,), is_leaf=True)  # primals_88
+	    buf88 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf88, (768,), is_leaf=True)  # primals_89
+	    buf89 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf89, (2304, 768), is_leaf=True)  # primals_90
+	    buf90 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf90, (2304,), is_leaf=True)  # primals_91
+	    buf91 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf91, (768, 768), is_leaf=True)  # primals_92
+	    buf92 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf92, (768,), is_leaf=True)  # primals_93
+	    buf93 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf93, (768,), is_leaf=True)  # primals_94
+	    buf94 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf94, (768,), is_leaf=True)  # primals_95
+	    buf95 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf95, (3072, 768), is_leaf=True)  # primals_96
+	    buf96 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf96, (3072,), is_leaf=True)  # primals_97
+	    buf97 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf97, (768, 3072), is_leaf=True)  # primals_98
+	    buf98 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf98, (768,), is_leaf=True)  # primals_99
+	    buf99 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf99, (768,), is_leaf=True)  # primals_100
+	    buf100 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf100, (768,), is_leaf=True)  # primals_101
+	    buf101 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf101, (2304, 768), is_leaf=True)  # primals_102
+	    buf102 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf102, (2304,), is_leaf=True)  # primals_103
+	    buf103 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf103, (768, 768), is_leaf=True)  # primals_104
+	    buf104 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf104, (768,), is_leaf=True)  # primals_105
+	    buf105 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf105, (768,), is_leaf=True)  # primals_106
+	    buf106 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf106, (768,), is_leaf=True)  # primals_107
+	    buf107 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf107, (3072, 768), is_leaf=True)  # primals_108
+	    buf108 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf108, (3072,), is_leaf=True)  # primals_109
+	    buf109 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf109, (768, 3072), is_leaf=True)  # primals_110
+	    buf110 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf110, (768,), is_leaf=True)  # primals_111
+	    buf111 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf111, (768,), is_leaf=True)  # primals_112
+	    buf112 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf112, (768,), is_leaf=True)  # primals_113
+	    buf113 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf113, (2304, 768), is_leaf=True)  # primals_114
+	    buf114 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf114, (2304,), is_leaf=True)  # primals_115
+	    buf115 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf115, (768, 768), is_leaf=True)  # primals_116
+	    buf116 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf116, (768,), is_leaf=True)  # primals_117
+	    buf117 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf117, (768,), is_leaf=True)  # primals_118
+	    buf118 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf118, (768,), is_leaf=True)  # primals_119
+	    buf119 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf119, (3072, 768), is_leaf=True)  # primals_120
+	    buf120 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf120, (3072,), is_leaf=True)  # primals_121
+	    buf121 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf121, (768, 3072), is_leaf=True)  # primals_122
+	    buf122 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf122, (768,), is_leaf=True)  # primals_123
+	    buf123 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf123, (768,), is_leaf=True)  # primals_124
+	    buf124 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf124, (768,), is_leaf=True)  # primals_125
+	    buf125 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf125, (2304, 768), is_leaf=True)  # primals_126
+	    buf126 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf126, (2304,), is_leaf=True)  # primals_127
+	    buf127 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf127, (768, 768), is_leaf=True)  # primals_128
+	    buf128 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf128, (768,), is_leaf=True)  # primals_129
+	    buf129 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf129, (768,), is_leaf=True)  # primals_130
+	    buf130 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf130, (768,), is_leaf=True)  # primals_131
+	    buf131 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf131, (3072, 768), is_leaf=True)  # primals_132
+	    buf132 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf132, (3072,), is_leaf=True)  # primals_133
+	    buf133 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf133, (768, 3072), is_leaf=True)  # primals_134
+	    buf134 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf134, (768,), is_leaf=True)  # primals_135
+	    buf135 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf135, (768,), is_leaf=True)  # primals_136
+	    buf136 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf136, (768,), is_leaf=True)  # primals_137
+	    buf137 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf137, (2304, 768), is_leaf=True)  # primals_138
+	    buf138 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf138, (2304,), is_leaf=True)  # primals_139
+	    buf139 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf139, (768, 768), is_leaf=True)  # primals_140
+	    buf140 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf140, (768,), is_leaf=True)  # primals_141
+	    buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf141, (768,), is_leaf=True)  # primals_142
+	    buf142 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf142, (768,), is_leaf=True)  # primals_143
+	    buf143 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf143, (3072, 768), is_leaf=True)  # primals_144
+	    buf144 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf144, (3072,), is_leaf=True)  # primals_145
+	    buf145 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf145, (768, 3072), is_leaf=True)  # primals_146
+	    buf146 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf146, (768,), is_leaf=True)  # primals_147
+	    buf147 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf147, (768,), is_leaf=True)  # primals_148
+	    buf148 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf148, (768,), is_leaf=True)  # primals_149
+	load_args._version = 0
+	mod = Repro()
+	if __name__ == '__main__':
+	    from torch._dynamo.repro.after_aot import run_repro
+	    with torch.no_grad():
+	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
+	        # To run it separately, do 
+	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
+	        # mod(*args)
+V0806 13:55:56.656000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "105eb069aa0795e578b78b6ad2013154"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_2: "f32[50304, 768][768, 1]cuda:0", primals_3: "f32[1024, 768][768, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_5: "f32[768][1]cuda:0", primals_6: "f32[2304, 768][768, 1]cuda:0", primals_7: "f32[2304][1]cuda:0", primals_8: "f32[768, 768][768, 1]cuda:0", primals_9: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_11: "f32[768][1]cuda:0", primals_12: "f32[3072, 768][768, 1]cuda:0", primals_13: "f32[3072][1]cuda:0", primals_14: "f32[768, 3072][3072, 1]cuda:0", primals_15: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_17: "f32[768][1]cuda:0", primals_18: "f32[2304, 768][768, 1]cuda:0", primals_19: "f32[2304][1]cuda:0", primals_20: "f32[768, 768][768, 1]cuda:0", primals_21: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_23: "f32[768][1]cuda:0", primals_24: "f32[3072, 768][768, 1]cuda:0", primals_25: "f32[3072][1]cuda:0", primals_26: "f32[768, 3072][3072, 1]cuda:0", primals_27: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_29: "f32[768][1]cuda:0", primals_30: "f32[2304, 768][768, 1]cuda:0", primals_31: "f32[2304][1]cuda:0", primals_32: "f32[768, 768][768, 1]cuda:0", primals_33: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_35: "f32[768][1]cuda:0", primals_36: "f32[3072, 768][768, 1]cuda:0", primals_37: "f32[3072][1]cuda:0", primals_38: "f32[768, 3072][3072, 1]cuda:0", primals_39: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_41: "f32[768][1]cuda:0", primals_42: "f32[2304, 768][768, 1]cuda:0", primals_43: "f32[2304][1]cuda:0", primals_44: "f32[768, 768][768, 1]cuda:0", primals_45: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_47: "f32[768][1]cuda:0", primals_48: "f32[3072, 768][768, 1]cuda:0", primals_49: "f32[3072][1]cuda:0", primals_50: "f32[768, 3072][3072, 1]cuda:0", primals_51: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_53: "f32[768][1]cuda:0", primals_54: "f32[2304, 768][768, 1]cuda:0", primals_55: "f32[2304][1]cuda:0", primals_56: "f32[768, 768][768, 1]cuda:0", primals_57: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_59: "f32[768][1]cuda:0", primals_60: "f32[3072, 768][768, 1]cuda:0", primals_61: "f32[3072][1]cuda:0", primals_62: "f32[768, 3072][3072, 1]cuda:0", primals_63: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_65: "f32[768][1]cuda:0", primals_66: "f32[2304, 768][768, 1]cuda:0", primals_67: "f32[2304][1]cuda:0", primals_68: "f32[768, 768][768, 1]cuda:0", primals_69: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_71: "f32[768][1]cuda:0", primals_72: "f32[3072, 768][768, 1]cuda:0", primals_73: "f32[3072][1]cuda:0", primals_74: "f32[768, 3072][3072, 1]cuda:0", primals_75: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_77: "f32[768][1]cuda:0", primals_78: "f32[2304, 768][768, 1]cuda:0", primals_79: "f32[2304][1]cuda:0", primals_80: "f32[768, 768][768, 1]cuda:0", primals_81: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_83: "f32[768][1]cuda:0", primals_84: "f32[3072, 768][768, 1]cuda:0", primals_85: "f32[3072][1]cuda:0", primals_86: "f32[768, 3072][3072, 1]cuda:0", primals_87: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_89: "f32[768][1]cuda:0", primals_90: "f32[2304, 768][768, 1]cuda:0", primals_91: "f32[2304][1]cuda:0", primals_92: "f32[768, 768][768, 1]cuda:0", primals_93: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_95: "f32[768][1]cuda:0", primals_96: "f32[3072, 768][768, 1]cuda:0", primals_97: "f32[3072][1]cuda:0", primals_98: "f32[768, 3072][3072, 1]cuda:0", primals_99: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_101: "f32[768][1]cuda:0", primals_102: "f32[2304, 768][768, 1]cuda:0", primals_103: "f32[2304][1]cuda:0", primals_104: "f32[768, 768][768, 1]cuda:0", primals_105: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_107: "f32[768][1]cuda:0", primals_108: "f32[3072, 768][768, 1]cuda:0", primals_109: "f32[3072][1]cuda:0", primals_110: "f32[768, 3072][3072, 1]cuda:0", primals_111: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_113: "f32[768][1]cuda:0", primals_114: "f32[2304, 768][768, 1]cuda:0", primals_115: "f32[2304][1]cuda:0", primals_116: "f32[768, 768][768, 1]cuda:0", primals_117: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_119: "f32[768][1]cuda:0", primals_120: "f32[3072, 768][768, 1]cuda:0", primals_121: "f32[3072][1]cuda:0", primals_122: "f32[768, 3072][3072, 1]cuda:0", primals_123: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_125: "f32[768][1]cuda:0", primals_126: "f32[2304, 768][768, 1]cuda:0", primals_127: "f32[2304][1]cuda:0", primals_128: "f32[768, 768][768, 1]cuda:0", primals_129: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_131: "f32[768][1]cuda:0", primals_132: "f32[3072, 768][768, 1]cuda:0", primals_133: "f32[3072][1]cuda:0", primals_134: "f32[768, 3072][3072, 1]cuda:0", primals_135: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_137: "f32[768][1]cuda:0", primals_138: "f32[2304, 768][768, 1]cuda:0", primals_139: "f32[2304][1]cuda:0", primals_140: "f32[768, 768][768, 1]cuda:0", primals_141: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_143: "f32[768][1]cuda:0", primals_144: "f32[3072, 768][768, 1]cuda:0", primals_145: "f32[3072][1]cuda:0", primals_146: "f32[768, 3072][3072, 1]cuda:0", primals_147: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", primals_149: "f32[768][1]cuda:0"):
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:223 in forward, code: pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(
+	        iota: "i64[64][1]cuda:0" = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)
+	        unsqueeze: "i64[1, 64][64, 1]cuda:0" = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        embedding: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_2, primals_1)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        embedding_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:232 in forward, code: x = self.transformer.drop(tok_emb + pos_emb)
+	        add: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)
+	        getitem: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[0]
+	        getitem_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean[1];  var_mean = None
+	        add_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None
+	        rsqrt: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_1);  add_1 = None
+	        sub: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add, getitem_1);  getitem_1 = None
+	        mul: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None
+	        mul_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, primals_4)
+	        add_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_2, [64, 768]);  add_2 = None
+	        permute: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None
+	        addmm: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None
+	        view_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm, [1, 64, 2304]);  addmm = None
+	        split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None
+	        getitem_2: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[0]
+	        getitem_3: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[1]
+	        getitem_4: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split[2];  split = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_2: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None
+	        permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_3: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None
+	        permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_4: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None
+	        permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)
+	        getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention[0]
+	        getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention[1]
+	        getitem_7: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[2]
+	        getitem_8: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_5, [64, 768]);  view_5 = None
+	        permute_5: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_6, permute_5);  view_6 = None
+	        add_tensor_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_23, primals_9);  mm_default_23 = primals_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_23, [1, 64, 768]);  add_tensor_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add, view_7);  add = view_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)
+	        getitem_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[0]
+	        getitem_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_1[1];  var_mean_1 = None
+	        add_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None
+	        rsqrt_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_4);  add_4 = None
+	        sub_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_3, getitem_10);  getitem_10 = None
+	        mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None
+	        mul_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, primals_10)
+	        add_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_5, [64, 768]);  add_5 = None
+	        permute_6: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None
+	        addmm_2: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None
+	        view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_2, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5);  view_9 = mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_4, add_7);  mul_4 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_7, [64, 3072]);  mul_7 = None
+	        permute_7: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_22: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_10, permute_7)
+	        add_tensor_22: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_22, primals_15);  mm_default_22 = primals_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_22, [1, 64, 768]);  add_tensor_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_3, view_11);  add_3 = view_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)
+	        getitem_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[0]
+	        getitem_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_2[1];  var_mean_2 = None
+	        add_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None
+	        rsqrt_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_9);  add_9 = None
+	        sub_2: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_8, getitem_12);  getitem_12 = None
+	        mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None
+	        mul_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, primals_16)
+	        add_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_10, [64, 768]);  add_10 = None
+	        permute_8: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None
+	        addmm_4: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None
+	        view_13: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_4, [1, 64, 2304]);  addmm_4 = None
+	        split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None
+	        getitem_13: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[0]
+	        getitem_14: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[1]
+	        getitem_15: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_1[2];  split_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_14: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None
+	        permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_15: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None
+	        permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_16: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None
+	        permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)
+	        getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[0]
+	        getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_1[1]
+	        getitem_18: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[2]
+	        getitem_19: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_17, [64, 768]);  view_17 = None
+	        permute_13: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_18, permute_13);  view_18 = None
+	        add_tensor_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_21, primals_21);  mm_default_21 = primals_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_21, [1, 64, 768]);  add_tensor_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_8, view_19);  add_8 = view_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)
+	        getitem_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[0]
+	        getitem_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_3[1];  var_mean_3 = None
+	        add_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None
+	        rsqrt_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_12);  add_12 = None
+	        sub_3: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_11, getitem_21);  getitem_21 = None
+	        mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None
+	        mul_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, primals_22)
+	        add_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_13, [64, 768]);  add_13 = None
+	        permute_14: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None
+	        addmm_6: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None
+	        view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_6, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13);  view_21 = mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_12, add_15);  mul_12 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_15, [64, 3072]);  mul_15 = None
+	        permute_15: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_22, permute_15)
+	        add_tensor_20: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_20, primals_27);  mm_default_20 = primals_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_20, [1, 64, 768]);  add_tensor_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_11, view_23);  add_11 = view_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)
+	        getitem_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[0]
+	        getitem_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_4[1];  var_mean_4 = None
+	        add_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None
+	        rsqrt_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_17);  add_17 = None
+	        sub_4: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_16, getitem_23);  getitem_23 = None
+	        mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None
+	        mul_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, primals_28)
+	        add_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_24: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_18, [64, 768]);  add_18 = None
+	        permute_16: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None
+	        addmm_8: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None
+	        view_25: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_8, [1, 64, 2304]);  addmm_8 = None
+	        split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None
+	        getitem_24: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[0]
+	        getitem_25: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[1]
+	        getitem_26: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_2[2];  split_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_26: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None
+	        permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_27: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None
+	        permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_28: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None
+	        permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)
+	        getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[0]
+	        getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_2[1]
+	        getitem_29: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[2]
+	        getitem_30: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_29, [64, 768]);  view_29 = None
+	        permute_21: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_30, permute_21);  view_30 = None
+	        add_tensor_19: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_19, primals_33);  mm_default_19 = primals_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_19, [1, 64, 768]);  add_tensor_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_16, view_31);  add_16 = view_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)
+	        getitem_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[0]
+	        getitem_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_5[1];  var_mean_5 = None
+	        add_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None
+	        rsqrt_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_20);  add_20 = None
+	        sub_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_19, getitem_32);  getitem_32 = None
+	        mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None
+	        mul_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, primals_34)
+	        add_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_32: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_21, [64, 768]);  add_21 = None
+	        permute_22: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None
+	        addmm_10: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None
+	        view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_10, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21);  view_33 = mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_20, add_23);  mul_20 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_23, [64, 3072]);  mul_23 = None
+	        permute_23: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_34, permute_23)
+	        add_tensor_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_18, primals_39);  mm_default_18 = primals_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_18, [1, 64, 768]);  add_tensor_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_19, view_35);  add_19 = view_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)
+	        getitem_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[0]
+	        getitem_34: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_6[1];  var_mean_6 = None
+	        add_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None
+	        rsqrt_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_25);  add_25 = None
+	        sub_6: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_24, getitem_34);  getitem_34 = None
+	        mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None
+	        mul_25: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, primals_40)
+	        add_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_36: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_26, [64, 768]);  add_26 = None
+	        permute_24: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None
+	        addmm_12: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None
+	        view_37: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_12, [1, 64, 2304]);  addmm_12 = None
+	        split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None
+	        getitem_35: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[0]
+	        getitem_36: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[1]
+	        getitem_37: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_3[2];  split_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_38: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None
+	        permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_39: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None
+	        permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_40: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None
+	        permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)
+	        getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[0]
+	        getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_3[1]
+	        getitem_40: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[2]
+	        getitem_41: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_41, [64, 768]);  view_41 = None
+	        permute_29: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_42, permute_29);  view_42 = None
+	        add_tensor_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_17, primals_45);  mm_default_17 = primals_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_17, [1, 64, 768]);  add_tensor_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_24, view_43);  add_24 = view_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)
+	        getitem_42: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[0]
+	        getitem_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_7[1];  var_mean_7 = None
+	        add_28: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None
+	        rsqrt_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_28);  add_28 = None
+	        sub_7: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_27, getitem_43);  getitem_43 = None
+	        mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None
+	        mul_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, primals_46)
+	        add_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_44: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_29, [64, 768]);  add_29 = None
+	        permute_30: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None
+	        addmm_14: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None
+	        view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_14, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29);  view_45 = mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_28, add_31);  mul_28 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_31, [64, 3072]);  mul_31 = None
+	        permute_31: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_16: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_46, permute_31)
+	        add_tensor_16: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_16, primals_51);  mm_default_16 = primals_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_16, [1, 64, 768]);  add_tensor_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_27, view_47);  add_27 = view_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)
+	        getitem_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[0]
+	        getitem_45: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_8[1];  var_mean_8 = None
+	        add_33: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None
+	        rsqrt_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_33);  add_33 = None
+	        sub_8: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_32, getitem_45);  getitem_45 = None
+	        mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None
+	        mul_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, primals_52)
+	        add_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_48: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_34, [64, 768]);  add_34 = None
+	        permute_32: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None
+	        addmm_16: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None
+	        view_49: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_16, [1, 64, 2304]);  addmm_16 = None
+	        split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None
+	        getitem_46: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[0]
+	        getitem_47: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[1]
+	        getitem_48: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_4[2];  split_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_50: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None
+	        permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_51: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None
+	        permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_52: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None
+	        permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)
+	        getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[0]
+	        getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_4[1]
+	        getitem_51: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[2]
+	        getitem_52: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_53, [64, 768]);  view_53 = None
+	        permute_37: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_54, permute_37);  view_54 = None
+	        add_tensor_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_15, primals_57);  mm_default_15 = primals_57 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_15, [1, 64, 768]);  add_tensor_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_32, view_55);  add_32 = view_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)
+	        getitem_53: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[0]
+	        getitem_54: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_9[1];  var_mean_9 = None
+	        add_36: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None
+	        rsqrt_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_36);  add_36 = None
+	        sub_9: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_35, getitem_54);  getitem_54 = None
+	        mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None
+	        mul_35: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, primals_58)
+	        add_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_56: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_37, [64, 768]);  add_37 = None
+	        permute_38: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None
+	        addmm_18: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None
+	        view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_18, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37);  view_57 = mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_36, add_39);  mul_36 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_58: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_39, [64, 3072]);  mul_39 = None
+	        permute_39: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_14: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_58, permute_39)
+	        add_tensor_14: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_14, primals_63);  mm_default_14 = primals_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_14, [1, 64, 768]);  add_tensor_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_35, view_59);  add_35 = view_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)
+	        getitem_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[0]
+	        getitem_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_10[1];  var_mean_10 = None
+	        add_41: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None
+	        rsqrt_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_41);  add_41 = None
+	        sub_10: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_40, getitem_56);  getitem_56 = None
+	        mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None
+	        mul_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, primals_64)
+	        add_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_60: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_42, [64, 768]);  add_42 = None
+	        permute_40: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None
+	        addmm_20: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None
+	        view_61: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_20, [1, 64, 2304]);  addmm_20 = None
+	        split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None
+	        getitem_57: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[0]
+	        getitem_58: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[1]
+	        getitem_59: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_5[2];  split_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_62: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None
+	        permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_63: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None
+	        permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_64: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None
+	        permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)
+	        getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[0]
+	        getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_5[1]
+	        getitem_62: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[2]
+	        getitem_63: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_65, [64, 768]);  view_65 = None
+	        permute_45: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_66, permute_45);  view_66 = None
+	        add_tensor_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_13, primals_69);  mm_default_13 = primals_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_13, [1, 64, 768]);  add_tensor_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_40, view_67);  add_40 = view_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)
+	        getitem_64: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[0]
+	        getitem_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_11[1];  var_mean_11 = None
+	        add_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None
+	        rsqrt_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_44);  add_44 = None
+	        sub_11: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_43, getitem_65);  getitem_65 = None
+	        mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None
+	        mul_43: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, primals_70)
+	        add_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_68: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_45, [64, 768]);  add_45 = None
+	        permute_46: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None
+	        addmm_22: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None
+	        view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_22, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45);  view_69 = mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_44, add_47);  mul_44 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_70: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_47, [64, 3072]);  mul_47 = None
+	        permute_47: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_70, permute_47)
+	        add_tensor_12: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_12, primals_75);  mm_default_12 = primals_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_71: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_12, [1, 64, 768]);  add_tensor_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_43, view_71);  add_43 = view_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)
+	        getitem_66: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[0]
+	        getitem_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_12[1];  var_mean_12 = None
+	        add_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None
+	        rsqrt_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_49);  add_49 = None
+	        sub_12: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_48, getitem_67);  getitem_67 = None
+	        mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None
+	        mul_49: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, primals_76)
+	        add_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_72: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_50, [64, 768]);  add_50 = None
+	        permute_48: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None
+	        addmm_24: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None
+	        view_73: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_24, [1, 64, 2304]);  addmm_24 = None
+	        split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None
+	        getitem_68: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[0]
+	        getitem_69: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[1]
+	        getitem_70: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_6[2];  split_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_74: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None
+	        permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_75: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None
+	        permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_76: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None
+	        permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)
+	        getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[0]
+	        getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_6[1]
+	        getitem_73: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[2]
+	        getitem_74: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_77, [64, 768]);  view_77 = None
+	        permute_53: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_78, permute_53);  view_78 = None
+	        add_tensor_11: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_11, primals_81);  mm_default_11 = primals_81 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_11, [1, 64, 768]);  add_tensor_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_48, view_79);  add_48 = view_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)
+	        getitem_75: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[0]
+	        getitem_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_13[1];  var_mean_13 = None
+	        add_52: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None
+	        rsqrt_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_52);  add_52 = None
+	        sub_13: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_51, getitem_76);  getitem_76 = None
+	        mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None
+	        mul_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, primals_82)
+	        add_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_80: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_53, [64, 768]);  add_53 = None
+	        permute_54: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None
+	        addmm_26: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None
+	        view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_26, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53);  view_81 = mul_53 = None
+	        mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_52, add_55);  mul_52 = add_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_82: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_55, [64, 3072]);  mul_55 = None
+	        permute_55: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_10: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_82, permute_55)
+	        add_tensor_10: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_10, primals_87);  mm_default_10 = primals_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_10, [1, 64, 768]);  add_tensor_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_51, view_83);  add_51 = view_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)
+	        getitem_77: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[0]
+	        getitem_78: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_14[1];  var_mean_14 = None
+	        add_57: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None
+	        rsqrt_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_57);  add_57 = None
+	        sub_14: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_56, getitem_78);  getitem_78 = None
+	        mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None
+	        mul_57: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, primals_88)
+	        add_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_84: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_58, [64, 768]);  add_58 = None
+	        permute_56: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None
+	        addmm_28: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None
+	        view_85: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_28, [1, 64, 2304]);  addmm_28 = None
+	        split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None
+	        getitem_79: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[0]
+	        getitem_80: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[1]
+	        getitem_81: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_7[2];  split_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_86: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None
+	        permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_87: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None
+	        permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_88: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None
+	        permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)
+	        getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[0]
+	        getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_7[1]
+	        getitem_84: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[2]
+	        getitem_85: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_89, [64, 768]);  view_89 = None
+	        permute_61: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_90, permute_61);  view_90 = None
+	        add_tensor_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_9, primals_93);  mm_default_9 = primals_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_9, [1, 64, 768]);  add_tensor_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_56, view_91);  add_56 = view_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)
+	        getitem_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[0]
+	        getitem_87: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_15[1];  var_mean_15 = None
+	        add_60: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None
+	        rsqrt_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_60);  add_60 = None
+	        sub_15: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_59, getitem_87);  getitem_87 = None
+	        mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None
+	        mul_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, primals_94)
+	        add_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_92: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_61, [64, 768]);  add_61 = None
+	        permute_62: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None
+	        addmm_30: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None
+	        view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_30, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61);  view_93 = mul_61 = None
+	        mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_60, add_63);  mul_60 = add_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_94: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_63, [64, 3072]);  mul_63 = None
+	        permute_63: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_94, permute_63)
+	        add_tensor_8: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_8, primals_99);  mm_default_8 = primals_99 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_95: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_8, [1, 64, 768]);  add_tensor_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_59, view_95);  add_59 = view_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)
+	        getitem_88: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[0]
+	        getitem_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_16[1];  var_mean_16 = None
+	        add_65: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None
+	        rsqrt_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_65);  add_65 = None
+	        sub_16: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_64, getitem_89);  getitem_89 = None
+	        mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None
+	        mul_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, primals_100)
+	        add_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_96: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_66, [64, 768]);  add_66 = None
+	        permute_64: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None
+	        addmm_32: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None
+	        view_97: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_32, [1, 64, 2304]);  addmm_32 = None
+	        split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None
+	        getitem_90: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[0]
+	        getitem_91: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[1]
+	        getitem_92: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_8[2];  split_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_98: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None
+	        permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_99: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None
+	        permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_100: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None
+	        permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)
+	        getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[0]
+	        getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_8[1]
+	        getitem_95: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[2]
+	        getitem_96: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_101, [64, 768]);  view_101 = None
+	        permute_69: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_102, permute_69);  view_102 = None
+	        add_tensor_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_7, primals_105);  mm_default_7 = primals_105 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_7, [1, 64, 768]);  add_tensor_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_64, view_103);  add_64 = view_103 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)
+	        getitem_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[0]
+	        getitem_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_17[1];  var_mean_17 = None
+	        add_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None
+	        rsqrt_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_68);  add_68 = None
+	        sub_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_67, getitem_98);  getitem_98 = None
+	        mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None
+	        mul_67: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, primals_106)
+	        add_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_104: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_69, [64, 768]);  add_69 = None
+	        permute_70: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None
+	        addmm_34: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None
+	        view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_34, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69);  view_105 = mul_69 = None
+	        mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_68, add_71);  mul_68 = add_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_106: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_71, [64, 3072]);  mul_71 = None
+	        permute_71: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_106, permute_71)
+	        add_tensor_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_6, primals_111);  mm_default_6 = primals_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_6, [1, 64, 768]);  add_tensor_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_67, view_107);  add_67 = view_107 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)
+	        getitem_99: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[0]
+	        getitem_100: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_18[1];  var_mean_18 = None
+	        add_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None
+	        rsqrt_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_73);  add_73 = None
+	        sub_18: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_72, getitem_100);  getitem_100 = None
+	        mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None
+	        mul_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, primals_112)
+	        add_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_108: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_74, [64, 768]);  add_74 = None
+	        permute_72: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None
+	        addmm_36: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None
+	        view_109: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_36, [1, 64, 2304]);  addmm_36 = None
+	        split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None
+	        getitem_101: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[0]
+	        getitem_102: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[1]
+	        getitem_103: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_9[2];  split_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_110: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None
+	        permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_111: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None
+	        permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_112: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None
+	        permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)
+	        getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[0]
+	        getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_9[1]
+	        getitem_106: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[2]
+	        getitem_107: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_113, [64, 768]);  view_113 = None
+	        permute_77: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_114, permute_77);  view_114 = None
+	        add_tensor_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_5, primals_117);  mm_default_5 = primals_117 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_5, [1, 64, 768]);  add_tensor_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_72, view_115);  add_72 = view_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)
+	        getitem_108: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[0]
+	        getitem_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_19[1];  var_mean_19 = None
+	        add_76: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None
+	        rsqrt_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_76);  add_76 = None
+	        sub_19: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_75, getitem_109);  getitem_109 = None
+	        mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None
+	        mul_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, primals_118)
+	        add_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_116: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_77, [64, 768]);  add_77 = None
+	        permute_78: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None
+	        addmm_38: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None
+	        view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_38, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77);  view_117 = mul_77 = None
+	        mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_76, add_79);  mul_76 = add_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_118: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_79, [64, 3072]);  mul_79 = None
+	        permute_79: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_4: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_118, permute_79)
+	        add_tensor_4: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_4, primals_123);  mm_default_4 = primals_123 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_4, [1, 64, 768]);  add_tensor_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_75, view_119);  add_75 = view_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)
+	        getitem_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[0]
+	        getitem_111: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_20[1];  var_mean_20 = None
+	        add_81: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None
+	        rsqrt_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_81);  add_81 = None
+	        sub_20: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_80, getitem_111);  getitem_111 = None
+	        mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None
+	        mul_81: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, primals_124)
+	        add_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_120: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_82, [64, 768]);  add_82 = None
+	        permute_80: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None
+	        addmm_40: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None
+	        view_121: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_40, [1, 64, 2304]);  addmm_40 = None
+	        split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None
+	        getitem_112: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[0]
+	        getitem_113: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[1]
+	        getitem_114: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_10[2];  split_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_122: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None
+	        permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_123: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None
+	        permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_124: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None
+	        permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)
+	        getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[0]
+	        getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_10[1]
+	        getitem_117: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[2]
+	        getitem_118: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_125, [64, 768]);  view_125 = None
+	        permute_85: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_126, permute_85);  view_126 = None
+	        add_tensor_3: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_3, primals_129);  mm_default_3 = primals_129 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_3, [1, 64, 768]);  add_tensor_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_80, view_127);  add_80 = view_127 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)
+	        getitem_119: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[0]
+	        getitem_120: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_21[1];  var_mean_21 = None
+	        add_84: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None
+	        rsqrt_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_84);  add_84 = None
+	        sub_21: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_83, getitem_120);  getitem_120 = None
+	        mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None
+	        mul_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, primals_130)
+	        add_85: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_128: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_85, [64, 768]);  add_85 = None
+	        permute_86: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None
+	        addmm_42: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None
+	        view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_42, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85);  view_129 = mul_85 = None
+	        mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_84, add_87);  mul_84 = add_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_130: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_87, [64, 3072]);  mul_87 = None
+	        permute_87: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_2: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_130, permute_87)
+	        add_tensor_2: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_2, primals_135);  mm_default_2 = primals_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_131: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_2, [1, 64, 768]);  add_tensor_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_83, view_131);  add_83 = view_131 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)
+	        getitem_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[0]
+	        getitem_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_22[1];  var_mean_22 = None
+	        add_89: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None
+	        rsqrt_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_89);  add_89 = None
+	        sub_22: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_88, getitem_122);  getitem_122 = None
+	        mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None
+	        mul_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, primals_136)
+	        add_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        view_132: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_90, [64, 768]);  add_90 = None
+	        permute_88: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None
+	        addmm_44: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None
+	        view_133: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_44, [1, 64, 2304]);  addmm_44 = None
+	        split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None
+	        getitem_123: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[0]
+	        getitem_124: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[1]
+	        getitem_125: "f32[1, 64, 768][147456, 2304, 1]cuda:0" = split_11[2];  split_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_134: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None
+	        permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_135: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None
+	        permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        view_136: "f32[1, 64, 12, 64][147456, 2304, 64, 1]cuda:0" = torch.ops.aten.reshape.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None
+	        permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0" = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)
+	        getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[0]
+	        getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0" = _scaled_dot_product_efficient_attention_11[1]
+	        getitem_128: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[2]
+	        getitem_129: "i64[][]cuda:0" = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_137, [64, 768]);  view_137 = None
+	        permute_93: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_138, permute_93);  view_138 = None
+	        add_tensor_1: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_1, primals_141);  mm_default_1 = primals_141 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor_1, [1, 64, 768]);  add_tensor_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:144 in forward, code: x = x + self.attn(self.ln_1(x))
+	        add_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_88, view_139);  add_88 = view_139 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)
+	        getitem_130: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[0]
+	        getitem_131: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_23[1];  var_mean_23 = None
+	        add_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None
+	        rsqrt_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_92);  add_92 = None
+	        sub_23: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_91, getitem_131);  getitem_131 = None
+	        mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None
+	        mul_91: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, primals_142)
+	        add_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_140: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_93, [64, 768]);  add_93 = None
+	        permute_94: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None
+	        addmm_46: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None
+	        view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_46, [1, 64, 3072])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93);  view_141 = mul_93 = None
+	        mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_92, add_95);  mul_92 = add_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_142: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(mul_95, [64, 3072]);  mul_95 = None
+	        permute_95: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None
+	        
+	        # No stacktrace found for following nodes
+	        mm_default: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_142, permute_95)
+	        add_tensor: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default, primals_147);  mm_default = primals_147 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(add_tensor, [1, 64, 768]);  add_tensor = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:145 in forward, code: x = x + self.mlp(self.ln_2(x))
+	        add_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_91, view_143);  add_91 = view_143 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)
+	        getitem_132: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[0]
+	        getitem_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = var_mean_24[1];  var_mean_24 = None
+	        add_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None
+	        rsqrt_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.rsqrt.default(add_97);  add_97 = None
+	        sub_24: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None
+	        mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None
+	        mul_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, primals_148)
+	        add_98: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        full_default: "i64[1][1]cuda:0" = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.index.Tensor(add_98, [None, full_default]);  add_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        permute_96: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None
+	        view_144: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(index, [1, 768]);  index = None
+	        mm: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.mm.default(view_144, permute_96)
+	        view_145: "f32[1, 1, 50304][50304, 50304, 1]cuda:0" = torch.ops.aten.reshape.default(mm, [1, 1, 50304]);  mm = None
+	        permute_99: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_101: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_105: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_109: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_117: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_121: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_125: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_3: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_129: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_137: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_4: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_141: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_145: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_5: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_149: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_157: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_6: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_161: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_165: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_169: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_177: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_181: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_185: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_9: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_189: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_197: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_10: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_201: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_205: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_11: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_209: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_217: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_12: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_221: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_225: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_229: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_237: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_241: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_245: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_15: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_249: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_257: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_16: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_261: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_265: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_17: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_269: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_277: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_18: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_281: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_285: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_289: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_297: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_301: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_305: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_21: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_309: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_317: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_22: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        permute_321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        permute_325: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_23: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        permute_329: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        permute_337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        div_24: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None
+	        return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)
+	        
+V0806 13:55:56.662000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f9732de63a2678cd84f99c06235155c7"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977756662900.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:57.358000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5c78e44dee7eae3e1fe0c6d03c87b6d6"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977757358803.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:57.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "24bd000f779650a5bca456fe7f33510d"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977757361591.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:57.361000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "817c28f5892846ba90a4e241b47bc9aa"}
+	{
+	"name": "code_gen",
+	"ts": 1722977757361702.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:57.367000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "268013e64077861aa06897ccb5d7ed85"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977757367030.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:57.994000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5ea38044443328684bb4a7c1e7a9345b"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977757994874.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:57.995000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f1875d4b22158eb9e26e63b134b00766"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977757995265.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:58.621000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "effece776be3ac8b00bd99af0f3b9e28"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977758621719.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:58.622000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d253d967fc7804a3c84fe17aa255d0d4"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977758622009.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:55:58.647000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fb1b1c454711c8a114bd0247e8c5bc50"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977758647481.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:55:58.648000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/6z/c6zg4h42euxwsaoxhpcfic2sgwsxsngjulbnvydlzmvlm7pswqjm.py"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "fd1ea0df8f0e030ef934fc089a8c1ff2"}
+	
+	# AOT ID: ['0_forward']
+	from ctypes import c_void_p, c_long
+	import torch
+	import math
+	import random
+	import os
+	import tempfile
+	from math import inf, nan
+	from torch._inductor.hooks import run_intermediate_hooks
+	from torch._inductor.utils import maybe_profile
+	from torch._inductor.codegen.memory_planning import _align as align
+	
+	from torch import device, empty_strided
+	from torch._inductor.async_compile import AsyncCompile
+	from torch._inductor.select_algorithm import extern_kernels
+	from torch._inductor.codegen.multi_kernel import MultiKernelCall
+	
+	aten = torch.ops.aten
+	inductor_ops = torch.ops.inductor
+	_quantized = torch.ops._quantized
+	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
+	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
+	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
+	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
+	alloc_from_pool = torch.ops.inductor._alloc_from_pool
+	async_compile = AsyncCompile()
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ud/cudpuzdqzuzntk7ujry646lmmfgeazd3ik3spi6vvf2mczwqx4bc.py
+	# Source Nodes: [add, arange, layer_norm, pos_emb, tok_emb], Original ATen: [aten.add, aten.arange, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward]
+	# add => add
+	# arange => iota
+	# layer_norm => add_1, add_2, mul, mul_1, rsqrt, sub, var_mean
+	# pos_emb => embedding_1
+	# tok_emb => embedding
+	triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.DEFAULT,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*i64', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*i64', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: 'i32', 10: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, out_ptr0, out_ptr3, out_ptr4, out_ptr5, xnumel, rnumel, XBLOCK : tl.constexpr, RBLOCK : tl.constexpr):
+	    xnumel = 64
+	    rnumel = 768
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
+	    xmask = xindex < xnumel
+	    rbase = tl.arange(0, RBLOCK)[None, :]
+	    x0 = xindex
+	    tmp0 = x0
+	    tl.store(out_ptr0 + (x0), tmp0, xmask)
+	    tmp1 = tl.load(in_ptr0 + (x0), xmask, eviction_policy='evict_last')
+	    tmp11_mean = tl.zeros([XBLOCK, RBLOCK], tl.float32)
+	    tmp11_m2 = tl.zeros([XBLOCK, RBLOCK], tl.float32)
+	    tmp11_weight = tl.zeros([XBLOCK, RBLOCK], tl.float32)
+	    for roffset in range(0, rnumel, RBLOCK):
+	        rindex = roffset + rbase
+	        rmask = rindex < rnumel
+	        r1 = rindex
+	        tmp2 = tl.full([XBLOCK, RBLOCK], 50304, tl.int32)
+	        tmp3 = tmp1 + tmp2
+	        tmp4 = tmp1 < 0
+	        tmp5 = tl.where(tmp4, tmp3, tmp1)
+	        tl.device_assert(((0 <= tmp5) & (tmp5 < 50304)) | ~(xmask), "index out of bounds: 0 <= tmp5 < 50304")
+	        tmp7 = tl.load(in_ptr1 + (r1 + (768*tmp5)), rmask & xmask, eviction_policy='evict_last', other=0.0)
+	        tmp8 = tl.load(in_ptr2 + (r1 + (768*tmp0)), rmask & xmask, eviction_policy='evict_last', other=0.0)
+	        tmp9 = tmp7 + tmp8
+	        tmp10 = tl.broadcast_to(tmp9, [XBLOCK, RBLOCK])
+	        tmp11_mean_next, tmp11_m2_next, tmp11_weight_next = triton_helpers.welford_reduce(
+	            tmp10, tmp11_mean, tmp11_m2, tmp11_weight, roffset == 0
+	        )
+	        tmp11_mean = tl.where(rmask & xmask, tmp11_mean_next, tmp11_mean)
+	        tmp11_m2 = tl.where(rmask & xmask, tmp11_m2_next, tmp11_m2)
+	        tmp11_weight = tl.where(rmask & xmask, tmp11_weight_next, tmp11_weight)
+	    tmp11_tmp, tmp12_tmp, tmp13_tmp = triton_helpers.welford(
+	        tmp11_mean, tmp11_m2, tmp11_weight, 1
+	    )
+	    tmp11 = tmp11_tmp[:, None]
+	    tmp12 = tmp12_tmp[:, None]
+	    tmp13 = tmp13_tmp[:, None]
+	    for roffset in range(0, rnumel, RBLOCK):
+	        rindex = roffset + rbase
+	        rmask = rindex < rnumel
+	        r1 = rindex
+	        tmp29 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	        tmp31 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	        tmp14 = tl.full([XBLOCK, RBLOCK], 50304, tl.int32)
+	        tmp15 = tmp1 + tmp14
+	        tmp16 = tmp1 < 0
+	        tmp17 = tl.where(tmp16, tmp15, tmp1)
+	        tl.device_assert(((0 <= tmp17) & (tmp17 < 50304)) | ~(xmask), "index out of bounds: 0 <= tmp17 < 50304")
+	        tmp19 = tl.load(in_ptr1 + (r1 + (768*tmp17)), rmask & xmask, eviction_policy='evict_first', other=0.0)
+	        tmp20 = tl.load(in_ptr2 + (r1 + (768*tmp0)), rmask & xmask, eviction_policy='evict_first', other=0.0)
+	        tmp21 = tmp19 + tmp20
+	        tmp22 = tmp21 - tmp11
+	        tmp23 = 768.0
+	        tmp24 = tmp12 / tmp23
+	        tmp25 = 1e-05
+	        tmp26 = tmp24 + tmp25
+	        tmp27 = libdevice.rsqrt(tmp26)
+	        tmp28 = tmp22 * tmp27
+	        tmp30 = tmp28 * tmp29
+	        tmp32 = tmp30 + tmp31
+	        tl.store(out_ptr3 + (r1 + (768*x0)), tmp28, rmask & xmask)
+	        tl.store(out_ptr4 + (r1 + (768*x0)), tmp32, rmask & xmask)
+	    tmp33 = 768.0
+	    tmp34 = tmp12 / tmp33
+	    tmp35 = 1e-05
+	    tmp36 = tmp34 + tmp35
+	    tmp37 = libdevice.rsqrt(tmp36)
+	    tmp38 = 0.0013020833333333333
+	    tmp39 = tmp37 * tmp38
+	    tl.store(out_ptr5 + (x0), tmp39, xmask)
+	''', device_str='cuda')
+	
+	import triton
+	import triton.language as tl
+	from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph
+	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ny/cnyd44m4eki6trasporudk5jhoi44nw2xayxtolgkf52e37t4zoc.py
+	# Source Nodes: [add, layer_norm_1, pos_emb, tok_emb, x_1], Original ATen: [aten.add, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward]
+	# add => add
+	# layer_norm_1 => add_4, add_5, mul_2, mul_3, rsqrt_1, sub_1, var_mean_1
+	# pos_emb => embedding_1
+	# tok_emb => embedding
+	# x_1 => add_3
+	triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*i64', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: 'i32', 12: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 6, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    x0 = xindex
+	    r1 = rindex
+	    tmp0 = tl.load(in_ptr0 + (x0), None, eviction_policy='evict_last')
+	    tmp7 = tl.load(in_ptr2 + (x0), None, eviction_policy='evict_last')
+	    tmp15 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp16 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp42 = tl.load(in_ptr5 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp44 = tl.load(in_ptr6 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp1 = tl.full([RBLOCK], 50304, tl.int32)
+	    tmp2 = tmp0 + tmp1
+	    tmp3 = tmp0 < 0
+	    tmp4 = tl.where(tmp3, tmp2, tmp0)
+	    tl.device_assert((0 <= tmp4) & (tmp4 < 50304), "index out of bounds: 0 <= tmp4 < 50304")
+	    tmp6 = tl.load(in_ptr1 + (r1 + (768*tmp4)), rmask, other=0.0)
+	    tmp8 = tl.full([RBLOCK], 1024, tl.int32)
+	    tmp9 = tmp7 + tmp8
+	    tmp10 = tmp7 < 0
+	    tmp11 = tl.where(tmp10, tmp9, tmp7)
+	    tl.device_assert((0 <= tmp11) & (tmp11 < 1024), "index out of bounds: 0 <= tmp11 < 1024")
+	    tmp13 = tl.load(in_ptr3 + (r1 + (768*tmp11)), rmask, other=0.0)
+	    tmp14 = tmp6 + tmp13
+	    tmp17 = tmp15 + tmp16
+	    tmp18 = tmp14 + tmp17
+	    tmp19 = tl.broadcast_to(tmp18, [RBLOCK])
+	    tmp21 = tl.where(rmask, tmp19, 0)
+	    tmp22 = tl.broadcast_to(tmp19, [RBLOCK])
+	    tmp24 = tl.where(rmask, tmp22, 0)
+	    tmp25 = triton_helpers.promote_to_tensor(tl.sum(tmp24, 0))
+	    tmp26 = tl.full([1], 768, tl.int32)
+	    tmp27 = tmp26.to(tl.float32)
+	    tmp28 = tmp25 / tmp27
+	    tmp29 = tmp19 - tmp28
+	    tmp30 = tmp29 * tmp29
+	    tmp31 = tl.broadcast_to(tmp30, [RBLOCK])
+	    tmp33 = tl.where(rmask, tmp31, 0)
+	    tmp34 = triton_helpers.promote_to_tensor(tl.sum(tmp33, 0))
+	    tmp35 = tmp18 - tmp28
+	    tmp36 = 768.0
+	    tmp37 = tmp34 / tmp36
+	    tmp38 = 1e-05
+	    tmp39 = tmp37 + tmp38
+	    tmp40 = libdevice.rsqrt(tmp39)
+	    tmp41 = tmp35 * tmp40
+	    tmp43 = tmp41 * tmp42
+	    tmp45 = tmp43 + tmp44
+	    tmp46 = 0.0013020833333333333
+	    tmp47 = tmp40 * tmp46
+	    tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp18, rmask)
+	    tl.store(out_ptr2 + (r1 + (768*x0)), tmp41, rmask)
+	    tl.store(out_ptr3 + (r1 + (768*x0)), tmp45, rmask)
+	    tl.store(out_ptr4 + (x0), tmp47, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/kg/ckgkhr4vlxuqepuczwft7i266h4lpr667eoo2cb6w36y3bmg4a4p.py
+	# Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh, x_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	# add_2 => add_6
+	# add_3 => add_7
+	# mul => mul_4
+	# mul_1 => mul_5
+	# mul_2 => mul_6
+	# pow_1 => pow_1
+	# tanh => tanh
+	# x_3 => mul_7
+	triton_poi_fused_add_mul_pow_tanh_2 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[262144], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_mul_pow_tanh_2', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_ptr0, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 196608
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0), None)
+	    tmp1 = 0.5
+	    tmp2 = tmp0 * tmp1
+	    tmp3 = tmp0 * tmp0
+	    tmp4 = tmp3 * tmp0
+	    tmp5 = 0.044715
+	    tmp6 = tmp4 * tmp5
+	    tmp7 = tmp0 + tmp6
+	    tmp8 = 0.7978845608028654
+	    tmp9 = tmp7 * tmp8
+	    tmp10 = libdevice.tanh(tmp9)
+	    tmp11 = 1.0
+	    tmp12 = tmp10 + tmp11
+	    tmp13 = tmp2 * tmp12
+	    tl.store(out_ptr0 + (x0), tmp13, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/op/copzf2xr7ackxgbya76mhoxugg5bcf7szouu3u7calkmq6tn64cz.py
+	# Source Nodes: [layer_norm_2, x_6], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	# layer_norm_2 => add_10, add_9, mul_8, mul_9, rsqrt_2, sub_2, var_mean_2
+	# x_6 => add_8
+	triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: 'i32', 9: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 5, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp28 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp30 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp3 = tmp1 + tmp2
+	    tmp4 = tmp0 + tmp3
+	    tmp5 = tl.broadcast_to(tmp4, [RBLOCK])
+	    tmp7 = tl.where(rmask, tmp5, 0)
+	    tmp8 = tl.broadcast_to(tmp5, [RBLOCK])
+	    tmp10 = tl.where(rmask, tmp8, 0)
+	    tmp11 = triton_helpers.promote_to_tensor(tl.sum(tmp10, 0))
+	    tmp12 = tl.full([1], 768, tl.int32)
+	    tmp13 = tmp12.to(tl.float32)
+	    tmp14 = tmp11 / tmp13
+	    tmp15 = tmp5 - tmp14
+	    tmp16 = tmp15 * tmp15
+	    tmp17 = tl.broadcast_to(tmp16, [RBLOCK])
+	    tmp19 = tl.where(rmask, tmp17, 0)
+	    tmp20 = triton_helpers.promote_to_tensor(tl.sum(tmp19, 0))
+	    tmp21 = tmp4 - tmp14
+	    tmp22 = 768.0
+	    tmp23 = tmp20 / tmp22
+	    tmp24 = 1e-05
+	    tmp25 = tmp23 + tmp24
+	    tmp26 = libdevice.rsqrt(tmp25)
+	    tmp27 = tmp21 * tmp26
+	    tmp29 = tmp27 * tmp28
+	    tmp31 = tmp29 + tmp30
+	    tmp32 = 0.0013020833333333333
+	    tmp33 = tmp26 * tmp32
+	    tl.store(out_ptr2 + (r1 + (768*x0)), tmp27, rmask)
+	    tl.store(out_ptr3 + (r1 + (768*x0)), tmp31, rmask)
+	    tl.store(out_ptr4 + (x0), tmp33, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ah/cahflggzhsvj3obucpotrc2sxhrjrpacfo22i6ocbqiklyoyzdzy.py
+	# Source Nodes: [layer_norm_3, x_6, x_7], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	# layer_norm_3 => add_12, add_13, mul_10, mul_11, rsqrt_3, sub_3, var_mean_3
+	# x_6 => add_8
+	# x_7 => add_11
+	triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: 'i32', 11: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 7, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, out_ptr2, out_ptr3, out_ptr4, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp5 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp6 = tl.load(in_ptr3 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp32 = tl.load(in_ptr4 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp34 = tl.load(in_ptr5 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp3 = tmp1 + tmp2
+	    tmp4 = tmp0 + tmp3
+	    tmp7 = tmp5 + tmp6
+	    tmp8 = tmp4 + tmp7
+	    tmp9 = tl.broadcast_to(tmp8, [RBLOCK])
+	    tmp11 = tl.where(rmask, tmp9, 0)
+	    tmp12 = tl.broadcast_to(tmp9, [RBLOCK])
+	    tmp14 = tl.where(rmask, tmp12, 0)
+	    tmp15 = triton_helpers.promote_to_tensor(tl.sum(tmp14, 0))
+	    tmp16 = tl.full([1], 768, tl.int32)
+	    tmp17 = tmp16.to(tl.float32)
+	    tmp18 = tmp15 / tmp17
+	    tmp19 = tmp9 - tmp18
+	    tmp20 = tmp19 * tmp19
+	    tmp21 = tl.broadcast_to(tmp20, [RBLOCK])
+	    tmp23 = tl.where(rmask, tmp21, 0)
+	    tmp24 = triton_helpers.promote_to_tensor(tl.sum(tmp23, 0))
+	    tmp25 = tmp8 - tmp18
+	    tmp26 = 768.0
+	    tmp27 = tmp24 / tmp26
+	    tmp28 = 1e-05
+	    tmp29 = tmp27 + tmp28
+	    tmp30 = libdevice.rsqrt(tmp29)
+	    tmp31 = tmp25 * tmp30
+	    tmp33 = tmp31 * tmp32
+	    tmp35 = tmp33 + tmp34
+	    tmp36 = 0.0013020833333333333
+	    tmp37 = tmp30 * tmp36
+	    tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp8, rmask)
+	    tl.store(out_ptr2 + (r1 + (768*x0)), tmp31, rmask)
+	    tl.store(out_ptr3 + (r1 + (768*x0)), tmp35, rmask)
+	    tl.store(out_ptr4 + (x0), tmp37, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/zr/czrac7rfezxm2zkcroo5lzwzcd2xr3jtjf3s7p5cgvsqj2nr6rzf.py
+	# Source Nodes: [x_72, x_73], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	# x_72 => add_96
+	# x_73 => add_97, mul_96, rsqrt_24, sub_24, var_mean_24
+	triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 3, 'num_reduction': 4, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr2, out_ptr3, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp2 = tl.load(in_ptr2 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp3 = tmp1 + tmp2
+	    tmp4 = tmp0 + tmp3
+	    tmp5 = tl.broadcast_to(tmp4, [RBLOCK])
+	    tmp7 = tl.where(rmask, tmp5, 0)
+	    tmp8 = tl.broadcast_to(tmp5, [RBLOCK])
+	    tmp10 = tl.where(rmask, tmp8, 0)
+	    tmp11 = triton_helpers.promote_to_tensor(tl.sum(tmp10, 0))
+	    tmp12 = tl.full([1], 768, tl.int32)
+	    tmp13 = tmp12.to(tl.float32)
+	    tmp14 = tmp11 / tmp13
+	    tmp15 = tmp5 - tmp14
+	    tmp16 = tmp15 * tmp15
+	    tmp17 = tl.broadcast_to(tmp16, [RBLOCK])
+	    tmp19 = tl.where(rmask, tmp17, 0)
+	    tmp20 = triton_helpers.promote_to_tensor(tl.sum(tmp19, 0))
+	    tmp21 = tmp4 - tmp14
+	    tmp22 = 768.0
+	    tmp23 = tmp20 / tmp22
+	    tmp24 = 1e-05
+	    tmp25 = tmp23 + tmp24
+	    tmp26 = libdevice.rsqrt(tmp25)
+	    tmp27 = tmp21 * tmp26
+	    tmp28 = 0.0013020833333333333
+	    tmp29 = tmp26 * tmp28
+	    tl.store(out_ptr2 + (r1 + (768*x0)), tmp27, rmask)
+	    tl.store(out_ptr3 + (x0), tmp29, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/hj/chjeqvb72dcyhretu5gdrd5qjthvzyvtudwafwqfm72xofwxb6bo.py
+	# Source Nodes: [getitem_36], Original ATen: [aten.lift_fresh]
+	# getitem_36 => full_default
+	triton_poi_fused_lift_fresh_6 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[1], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*i64', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {1: 1}, 'configs': [AttrsDescriptor(divisible_by_16=(0,), equal_to_1=(1,))]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_lift_fresh_6', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 1
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    tmp0 = tl.full([1], -1, tl.int64)
+	    tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp0, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/tg/ctgrjv6psw3vl6ccv5fzvtesrskw56ih646u36i3kxh3ljsmtwct.py
+	# Source Nodes: [getitem_36, x_73], Original ATen: [aten.index, aten.native_layer_norm]
+	# getitem_36 => index
+	# x_73 => add_98, mul_97
+	triton_poi_fused_index_native_layer_norm_7 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[1024], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_index_native_layer_norm_7', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 768
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = xindex < xnumel
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (48384 + x0), xmask)
+	    tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	    tmp3 = tl.load(in_ptr2 + (x0), xmask)
+	    tmp2 = tmp0 * tmp1
+	    tmp4 = tmp2 + tmp3
+	    tl.store(out_ptr0 + (x0), tmp4, xmask)
+	''', device_str='cuda')
+	
+	
+	async_compile.wait(globals())
+	del async_compile
+	
+	def call(args):
+	    primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149 = args
+	    args.clear()
+	    assert_size_stride(primals_1, (1, 64), (64, 1))
+	    assert_size_stride(primals_2, (50304, 768), (768, 1))
+	    assert_size_stride(primals_3, (1024, 768), (768, 1))
+	    assert_size_stride(primals_4, (768, ), (1, ))
+	    assert_size_stride(primals_5, (768, ), (1, ))
+	    assert_size_stride(primals_6, (2304, 768), (768, 1))
+	    assert_size_stride(primals_7, (2304, ), (1, ))
+	    assert_size_stride(primals_8, (768, 768), (768, 1))
+	    assert_size_stride(primals_9, (768, ), (1, ))
+	    assert_size_stride(primals_10, (768, ), (1, ))
+	    assert_size_stride(primals_11, (768, ), (1, ))
+	    assert_size_stride(primals_12, (3072, 768), (768, 1))
+	    assert_size_stride(primals_13, (3072, ), (1, ))
+	    assert_size_stride(primals_14, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_15, (768, ), (1, ))
+	    assert_size_stride(primals_16, (768, ), (1, ))
+	    assert_size_stride(primals_17, (768, ), (1, ))
+	    assert_size_stride(primals_18, (2304, 768), (768, 1))
+	    assert_size_stride(primals_19, (2304, ), (1, ))
+	    assert_size_stride(primals_20, (768, 768), (768, 1))
+	    assert_size_stride(primals_21, (768, ), (1, ))
+	    assert_size_stride(primals_22, (768, ), (1, ))
+	    assert_size_stride(primals_23, (768, ), (1, ))
+	    assert_size_stride(primals_24, (3072, 768), (768, 1))
+	    assert_size_stride(primals_25, (3072, ), (1, ))
+	    assert_size_stride(primals_26, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_27, (768, ), (1, ))
+	    assert_size_stride(primals_28, (768, ), (1, ))
+	    assert_size_stride(primals_29, (768, ), (1, ))
+	    assert_size_stride(primals_30, (2304, 768), (768, 1))
+	    assert_size_stride(primals_31, (2304, ), (1, ))
+	    assert_size_stride(primals_32, (768, 768), (768, 1))
+	    assert_size_stride(primals_33, (768, ), (1, ))
+	    assert_size_stride(primals_34, (768, ), (1, ))
+	    assert_size_stride(primals_35, (768, ), (1, ))
+	    assert_size_stride(primals_36, (3072, 768), (768, 1))
+	    assert_size_stride(primals_37, (3072, ), (1, ))
+	    assert_size_stride(primals_38, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_39, (768, ), (1, ))
+	    assert_size_stride(primals_40, (768, ), (1, ))
+	    assert_size_stride(primals_41, (768, ), (1, ))
+	    assert_size_stride(primals_42, (2304, 768), (768, 1))
+	    assert_size_stride(primals_43, (2304, ), (1, ))
+	    assert_size_stride(primals_44, (768, 768), (768, 1))
+	    assert_size_stride(primals_45, (768, ), (1, ))
+	    assert_size_stride(primals_46, (768, ), (1, ))
+	    assert_size_stride(primals_47, (768, ), (1, ))
+	    assert_size_stride(primals_48, (3072, 768), (768, 1))
+	    assert_size_stride(primals_49, (3072, ), (1, ))
+	    assert_size_stride(primals_50, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_51, (768, ), (1, ))
+	    assert_size_stride(primals_52, (768, ), (1, ))
+	    assert_size_stride(primals_53, (768, ), (1, ))
+	    assert_size_stride(primals_54, (2304, 768), (768, 1))
+	    assert_size_stride(primals_55, (2304, ), (1, ))
+	    assert_size_stride(primals_56, (768, 768), (768, 1))
+	    assert_size_stride(primals_57, (768, ), (1, ))
+	    assert_size_stride(primals_58, (768, ), (1, ))
+	    assert_size_stride(primals_59, (768, ), (1, ))
+	    assert_size_stride(primals_60, (3072, 768), (768, 1))
+	    assert_size_stride(primals_61, (3072, ), (1, ))
+	    assert_size_stride(primals_62, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_63, (768, ), (1, ))
+	    assert_size_stride(primals_64, (768, ), (1, ))
+	    assert_size_stride(primals_65, (768, ), (1, ))
+	    assert_size_stride(primals_66, (2304, 768), (768, 1))
+	    assert_size_stride(primals_67, (2304, ), (1, ))
+	    assert_size_stride(primals_68, (768, 768), (768, 1))
+	    assert_size_stride(primals_69, (768, ), (1, ))
+	    assert_size_stride(primals_70, (768, ), (1, ))
+	    assert_size_stride(primals_71, (768, ), (1, ))
+	    assert_size_stride(primals_72, (3072, 768), (768, 1))
+	    assert_size_stride(primals_73, (3072, ), (1, ))
+	    assert_size_stride(primals_74, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_75, (768, ), (1, ))
+	    assert_size_stride(primals_76, (768, ), (1, ))
+	    assert_size_stride(primals_77, (768, ), (1, ))
+	    assert_size_stride(primals_78, (2304, 768), (768, 1))
+	    assert_size_stride(primals_79, (2304, ), (1, ))
+	    assert_size_stride(primals_80, (768, 768), (768, 1))
+	    assert_size_stride(primals_81, (768, ), (1, ))
+	    assert_size_stride(primals_82, (768, ), (1, ))
+	    assert_size_stride(primals_83, (768, ), (1, ))
+	    assert_size_stride(primals_84, (3072, 768), (768, 1))
+	    assert_size_stride(primals_85, (3072, ), (1, ))
+	    assert_size_stride(primals_86, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_87, (768, ), (1, ))
+	    assert_size_stride(primals_88, (768, ), (1, ))
+	    assert_size_stride(primals_89, (768, ), (1, ))
+	    assert_size_stride(primals_90, (2304, 768), (768, 1))
+	    assert_size_stride(primals_91, (2304, ), (1, ))
+	    assert_size_stride(primals_92, (768, 768), (768, 1))
+	    assert_size_stride(primals_93, (768, ), (1, ))
+	    assert_size_stride(primals_94, (768, ), (1, ))
+	    assert_size_stride(primals_95, (768, ), (1, ))
+	    assert_size_stride(primals_96, (3072, 768), (768, 1))
+	    assert_size_stride(primals_97, (3072, ), (1, ))
+	    assert_size_stride(primals_98, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_99, (768, ), (1, ))
+	    assert_size_stride(primals_100, (768, ), (1, ))
+	    assert_size_stride(primals_101, (768, ), (1, ))
+	    assert_size_stride(primals_102, (2304, 768), (768, 1))
+	    assert_size_stride(primals_103, (2304, ), (1, ))
+	    assert_size_stride(primals_104, (768, 768), (768, 1))
+	    assert_size_stride(primals_105, (768, ), (1, ))
+	    assert_size_stride(primals_106, (768, ), (1, ))
+	    assert_size_stride(primals_107, (768, ), (1, ))
+	    assert_size_stride(primals_108, (3072, 768), (768, 1))
+	    assert_size_stride(primals_109, (3072, ), (1, ))
+	    assert_size_stride(primals_110, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_111, (768, ), (1, ))
+	    assert_size_stride(primals_112, (768, ), (1, ))
+	    assert_size_stride(primals_113, (768, ), (1, ))
+	    assert_size_stride(primals_114, (2304, 768), (768, 1))
+	    assert_size_stride(primals_115, (2304, ), (1, ))
+	    assert_size_stride(primals_116, (768, 768), (768, 1))
+	    assert_size_stride(primals_117, (768, ), (1, ))
+	    assert_size_stride(primals_118, (768, ), (1, ))
+	    assert_size_stride(primals_119, (768, ), (1, ))
+	    assert_size_stride(primals_120, (3072, 768), (768, 1))
+	    assert_size_stride(primals_121, (3072, ), (1, ))
+	    assert_size_stride(primals_122, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_123, (768, ), (1, ))
+	    assert_size_stride(primals_124, (768, ), (1, ))
+	    assert_size_stride(primals_125, (768, ), (1, ))
+	    assert_size_stride(primals_126, (2304, 768), (768, 1))
+	    assert_size_stride(primals_127, (2304, ), (1, ))
+	    assert_size_stride(primals_128, (768, 768), (768, 1))
+	    assert_size_stride(primals_129, (768, ), (1, ))
+	    assert_size_stride(primals_130, (768, ), (1, ))
+	    assert_size_stride(primals_131, (768, ), (1, ))
+	    assert_size_stride(primals_132, (3072, 768), (768, 1))
+	    assert_size_stride(primals_133, (3072, ), (1, ))
+	    assert_size_stride(primals_134, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_135, (768, ), (1, ))
+	    assert_size_stride(primals_136, (768, ), (1, ))
+	    assert_size_stride(primals_137, (768, ), (1, ))
+	    assert_size_stride(primals_138, (2304, 768), (768, 1))
+	    assert_size_stride(primals_139, (2304, ), (1, ))
+	    assert_size_stride(primals_140, (768, 768), (768, 1))
+	    assert_size_stride(primals_141, (768, ), (1, ))
+	    assert_size_stride(primals_142, (768, ), (1, ))
+	    assert_size_stride(primals_143, (768, ), (1, ))
+	    assert_size_stride(primals_144, (3072, 768), (768, 1))
+	    assert_size_stride(primals_145, (3072, ), (1, ))
+	    assert_size_stride(primals_146, (768, 3072), (3072, 1))
+	    assert_size_stride(primals_147, (768, ), (1, ))
+	    assert_size_stride(primals_148, (768, ), (1, ))
+	    assert_size_stride(primals_149, (768, ), (1, ))
+	    with torch.cuda._DeviceGuard(0):
+	        torch.cuda.set_device(0)
+	        buf0 = empty_strided_cuda((64, ), (1, ), torch.int64)
+	        buf4 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf5 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf284 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [add, arange, layer_norm, pos_emb, tok_emb], Original ATen: [aten.add, aten.arange, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        stream0 = get_raw_stream(0)
+	        triton_red_fused_add_arange_embedding_native_layer_norm_native_layer_norm_backward_0.run(primals_1, primals_2, primals_3, primals_4, primals_5, buf0, buf4, buf5, buf284, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_5
+	        buf6 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_7, reinterpret_tensor(buf5, (64, 768), (768, 1), 0), reinterpret_tensor(primals_6, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf6)
+	        del primals_7
+	        # Source Nodes: [y], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf6, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf8 = buf7[0]
+	        buf9 = buf7[1]
+	        buf10 = buf7[2]
+	        buf11 = buf7[3]
+	        del buf7
+	        buf12 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf8, (64, 768), (768, 1), 0), reinterpret_tensor(primals_8, (768, 768), (1, 768), 0), out=buf12)
+	        buf13 = reinterpret_tensor(buf12, (1, 64, 768), (49152, 768, 1), 0); del buf12  # reuse
+	        buf17 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf18 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf283 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [add, layer_norm_1, pos_emb, tok_emb, x_1], Original ATen: [aten.add, aten.embedding, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_embedding_native_layer_norm_native_layer_norm_backward_1.run(buf13, primals_1, primals_2, buf0, primals_3, primals_9, primals_10, primals_11, buf17, buf18, buf283, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_11
+	        del primals_3
+	        del primals_9
+	        buf19 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_2], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_13, reinterpret_tensor(buf18, (64, 768), (768, 1), 0), reinterpret_tensor(primals_12, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf19)
+	        del primals_13
+	        buf20 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh, x_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf19, buf20, 196608, grid=grid(196608), stream=stream0)
+	        buf21 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf20, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_14, (3072, 768), (1, 3072), 0), out=buf21)
+	        buf25 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf26 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf282 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_2, x_6], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf13, buf21, primals_15, primals_16, primals_17, buf25, buf26, buf282, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_17
+	        buf27 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_4], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_19, reinterpret_tensor(buf26, (64, 768), (768, 1), 0), reinterpret_tensor(primals_18, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf27)
+	        del primals_19
+	        # Source Nodes: [y_3], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf28 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf27, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf29 = buf28[0]
+	        buf30 = buf28[1]
+	        buf31 = buf28[2]
+	        buf32 = buf28[3]
+	        del buf28
+	        buf33 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf29, (64, 768), (768, 1), 0), reinterpret_tensor(primals_20, (768, 768), (1, 768), 0), out=buf33)
+	        buf34 = reinterpret_tensor(buf33, (1, 64, 768), (49152, 768, 1), 0); del buf33  # reuse
+	        buf38 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf39 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf281 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_3, x_6, x_7], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf34, buf13, buf21, primals_15, primals_21, primals_22, primals_23, buf38, buf39, buf281, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_15
+	        del primals_21
+	        del primals_23
+	        buf40 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_8], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_25, reinterpret_tensor(buf39, (64, 768), (768, 1), 0), reinterpret_tensor(primals_24, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf40)
+	        del primals_25
+	        buf41 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_6, add_7, mul_4, mul_5, mul_6, pow_2, tanh_1, x_9], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf40, buf41, 196608, grid=grid(196608), stream=stream0)
+	        buf42 = buf21; del buf21  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf41, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_26, (3072, 768), (1, 3072), 0), out=buf42)
+	        buf46 = buf13; del buf13  # reuse
+	        buf47 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf280 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_4, x_12], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf34, buf42, primals_27, primals_28, primals_29, buf46, buf47, buf280, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_29
+	        buf48 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_8], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_31, reinterpret_tensor(buf47, (64, 768), (768, 1), 0), reinterpret_tensor(primals_30, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf48)
+	        del primals_31
+	        # Source Nodes: [y_6], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf49 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf48, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf50 = buf49[0]
+	        buf51 = buf49[1]
+	        buf52 = buf49[2]
+	        buf53 = buf49[3]
+	        del buf49
+	        buf54 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf50, (64, 768), (768, 1), 0), reinterpret_tensor(primals_32, (768, 768), (1, 768), 0), out=buf54)
+	        buf55 = reinterpret_tensor(buf54, (1, 64, 768), (49152, 768, 1), 0); del buf54  # reuse
+	        buf59 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf60 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf279 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_5, x_12, x_13], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf55, buf34, buf42, primals_27, primals_33, primals_34, primals_35, buf59, buf60, buf279, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_27
+	        del primals_33
+	        del primals_35
+	        buf61 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_14], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_37, reinterpret_tensor(buf60, (64, 768), (768, 1), 0), reinterpret_tensor(primals_36, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf61)
+	        del primals_37
+	        buf62 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_10, add_11, mul_10, mul_8, mul_9, pow_3, tanh_2, x_15], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf61, buf62, 196608, grid=grid(196608), stream=stream0)
+	        buf63 = buf42; del buf42  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf62, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_38, (3072, 768), (1, 3072), 0), out=buf63)
+	        buf67 = buf34; del buf34  # reuse
+	        buf68 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf278 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_6, x_18], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf55, buf63, primals_39, primals_40, primals_41, buf67, buf68, buf278, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_41
+	        buf69 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_12], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_43, reinterpret_tensor(buf68, (64, 768), (768, 1), 0), reinterpret_tensor(primals_42, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf69)
+	        del primals_43
+	        # Source Nodes: [y_9], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf70 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf69, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf71 = buf70[0]
+	        buf72 = buf70[1]
+	        buf73 = buf70[2]
+	        buf74 = buf70[3]
+	        del buf70
+	        buf75 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf71, (64, 768), (768, 1), 0), reinterpret_tensor(primals_44, (768, 768), (1, 768), 0), out=buf75)
+	        buf76 = reinterpret_tensor(buf75, (1, 64, 768), (49152, 768, 1), 0); del buf75  # reuse
+	        buf80 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf81 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf277 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_7, x_18, x_19], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf76, buf55, buf63, primals_39, primals_45, primals_46, primals_47, buf80, buf81, buf277, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_39
+	        del primals_45
+	        del primals_47
+	        buf82 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_20], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_49, reinterpret_tensor(buf81, (64, 768), (768, 1), 0), reinterpret_tensor(primals_48, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf82)
+	        del primals_49
+	        buf83 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_14, add_15, mul_12, mul_13, mul_14, pow_4, tanh_3, x_21], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf82, buf83, 196608, grid=grid(196608), stream=stream0)
+	        buf84 = buf63; del buf63  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf83, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_50, (3072, 768), (1, 3072), 0), out=buf84)
+	        buf88 = buf55; del buf55  # reuse
+	        buf89 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf276 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_8, x_24], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf76, buf84, primals_51, primals_52, primals_53, buf88, buf89, buf276, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_53
+	        buf90 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_16], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_55, reinterpret_tensor(buf89, (64, 768), (768, 1), 0), reinterpret_tensor(primals_54, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf90)
+	        del primals_55
+	        # Source Nodes: [y_12], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf91 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf90, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf92 = buf91[0]
+	        buf93 = buf91[1]
+	        buf94 = buf91[2]
+	        buf95 = buf91[3]
+	        del buf91
+	        buf96 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf92, (64, 768), (768, 1), 0), reinterpret_tensor(primals_56, (768, 768), (1, 768), 0), out=buf96)
+	        buf97 = reinterpret_tensor(buf96, (1, 64, 768), (49152, 768, 1), 0); del buf96  # reuse
+	        buf101 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf102 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf275 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_9, x_24, x_25], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf97, buf76, buf84, primals_51, primals_57, primals_58, primals_59, buf101, buf102, buf275, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_51
+	        del primals_57
+	        del primals_59
+	        buf103 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_26], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_61, reinterpret_tensor(buf102, (64, 768), (768, 1), 0), reinterpret_tensor(primals_60, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf103)
+	        del primals_61
+	        buf104 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_18, add_19, mul_16, mul_17, mul_18, pow_5, tanh_4, x_27], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf103, buf104, 196608, grid=grid(196608), stream=stream0)
+	        buf105 = buf84; del buf84  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf104, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_62, (3072, 768), (1, 3072), 0), out=buf105)
+	        buf109 = buf76; del buf76  # reuse
+	        buf110 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf274 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_10, x_30], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf97, buf105, primals_63, primals_64, primals_65, buf109, buf110, buf274, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_65
+	        buf111 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_20], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_67, reinterpret_tensor(buf110, (64, 768), (768, 1), 0), reinterpret_tensor(primals_66, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf111)
+	        del primals_67
+	        # Source Nodes: [y_15], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf112 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf111, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf113 = buf112[0]
+	        buf114 = buf112[1]
+	        buf115 = buf112[2]
+	        buf116 = buf112[3]
+	        del buf112
+	        buf117 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf113, (64, 768), (768, 1), 0), reinterpret_tensor(primals_68, (768, 768), (1, 768), 0), out=buf117)
+	        buf118 = reinterpret_tensor(buf117, (1, 64, 768), (49152, 768, 1), 0); del buf117  # reuse
+	        buf122 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf123 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf273 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_11, x_30, x_31], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf118, buf97, buf105, primals_63, primals_69, primals_70, primals_71, buf122, buf123, buf273, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_63
+	        del primals_69
+	        del primals_71
+	        buf124 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_32], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_73, reinterpret_tensor(buf123, (64, 768), (768, 1), 0), reinterpret_tensor(primals_72, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf124)
+	        del primals_73
+	        buf125 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_22, add_23, mul_20, mul_21, mul_22, pow_6, tanh_5, x_33], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf124, buf125, 196608, grid=grid(196608), stream=stream0)
+	        buf126 = reinterpret_tensor(buf97, (64, 768), (768, 1), 0); del buf97  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf125, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_74, (3072, 768), (1, 3072), 0), out=buf126)
+	        buf130 = reinterpret_tensor(buf105, (1, 64, 768), (49152, 768, 1), 0); del buf105  # reuse
+	        buf131 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf272 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_12, x_36], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf118, buf126, primals_75, primals_76, primals_77, buf130, buf131, buf272, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_77
+	        buf132 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_24], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_79, reinterpret_tensor(buf131, (64, 768), (768, 1), 0), reinterpret_tensor(primals_78, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf132)
+	        del primals_79
+	        # Source Nodes: [y_18], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf133 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf132, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf134 = buf133[0]
+	        buf135 = buf133[1]
+	        buf136 = buf133[2]
+	        buf137 = buf133[3]
+	        del buf133
+	        buf138 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf134, (64, 768), (768, 1), 0), reinterpret_tensor(primals_80, (768, 768), (1, 768), 0), out=buf138)
+	        buf139 = reinterpret_tensor(buf138, (1, 64, 768), (49152, 768, 1), 0); del buf138  # reuse
+	        buf143 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf144 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf271 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_13, x_36, x_37], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf139, buf118, buf126, primals_75, primals_81, primals_82, primals_83, buf143, buf144, buf271, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_75
+	        del primals_81
+	        del primals_83
+	        buf145 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_38], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_85, reinterpret_tensor(buf144, (64, 768), (768, 1), 0), reinterpret_tensor(primals_84, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf145)
+	        del primals_85
+	        buf146 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_26, add_27, mul_24, mul_25, mul_26, pow_7, tanh_6, x_39], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf145, buf146, 196608, grid=grid(196608), stream=stream0)
+	        buf147 = buf126; del buf126  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf146, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_86, (3072, 768), (1, 3072), 0), out=buf147)
+	        buf151 = buf118; del buf118  # reuse
+	        buf152 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf270 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_14, x_42], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf139, buf147, primals_87, primals_88, primals_89, buf151, buf152, buf270, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_89
+	        buf153 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_28], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_91, reinterpret_tensor(buf152, (64, 768), (768, 1), 0), reinterpret_tensor(primals_90, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf153)
+	        del primals_91
+	        # Source Nodes: [y_21], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf154 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf153, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf155 = buf154[0]
+	        buf156 = buf154[1]
+	        buf157 = buf154[2]
+	        buf158 = buf154[3]
+	        del buf154
+	        buf159 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf155, (64, 768), (768, 1), 0), reinterpret_tensor(primals_92, (768, 768), (1, 768), 0), out=buf159)
+	        buf160 = reinterpret_tensor(buf159, (1, 64, 768), (49152, 768, 1), 0); del buf159  # reuse
+	        buf164 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf165 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf269 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_15, x_42, x_43], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf160, buf139, buf147, primals_87, primals_93, primals_94, primals_95, buf164, buf165, buf269, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_87
+	        del primals_93
+	        del primals_95
+	        buf166 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_44], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_97, reinterpret_tensor(buf165, (64, 768), (768, 1), 0), reinterpret_tensor(primals_96, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf166)
+	        del primals_97
+	        buf167 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_30, add_31, mul_28, mul_29, mul_30, pow_8, tanh_7, x_45], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf166, buf167, 196608, grid=grid(196608), stream=stream0)
+	        buf168 = buf147; del buf147  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf167, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_98, (3072, 768), (1, 3072), 0), out=buf168)
+	        buf172 = buf139; del buf139  # reuse
+	        buf173 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf268 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_16, x_48], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf160, buf168, primals_99, primals_100, primals_101, buf172, buf173, buf268, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_101
+	        buf174 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_32], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_103, reinterpret_tensor(buf173, (64, 768), (768, 1), 0), reinterpret_tensor(primals_102, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf174)
+	        del primals_103
+	        # Source Nodes: [y_24], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf175 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf174, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf176 = buf175[0]
+	        buf177 = buf175[1]
+	        buf178 = buf175[2]
+	        buf179 = buf175[3]
+	        del buf175
+	        buf180 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf176, (64, 768), (768, 1), 0), reinterpret_tensor(primals_104, (768, 768), (1, 768), 0), out=buf180)
+	        buf181 = reinterpret_tensor(buf180, (1, 64, 768), (49152, 768, 1), 0); del buf180  # reuse
+	        buf185 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf186 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf267 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_17, x_48, x_49], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf181, buf160, buf168, primals_99, primals_105, primals_106, primals_107, buf185, buf186, buf267, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_105
+	        del primals_107
+	        del primals_99
+	        buf187 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_50], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_109, reinterpret_tensor(buf186, (64, 768), (768, 1), 0), reinterpret_tensor(primals_108, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf187)
+	        del primals_109
+	        buf188 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_34, add_35, mul_32, mul_33, mul_34, pow_9, tanh_8, x_51], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf187, buf188, 196608, grid=grid(196608), stream=stream0)
+	        buf189 = buf168; del buf168  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf188, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_110, (3072, 768), (1, 3072), 0), out=buf189)
+	        buf193 = buf160; del buf160  # reuse
+	        buf194 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf266 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_18, x_54], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf181, buf189, primals_111, primals_112, primals_113, buf193, buf194, buf266, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_113
+	        buf195 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_36], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_115, reinterpret_tensor(buf194, (64, 768), (768, 1), 0), reinterpret_tensor(primals_114, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf195)
+	        del primals_115
+	        # Source Nodes: [y_27], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf196 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf195, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf197 = buf196[0]
+	        buf198 = buf196[1]
+	        buf199 = buf196[2]
+	        buf200 = buf196[3]
+	        del buf196
+	        buf201 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf197, (64, 768), (768, 1), 0), reinterpret_tensor(primals_116, (768, 768), (1, 768), 0), out=buf201)
+	        buf202 = reinterpret_tensor(buf201, (1, 64, 768), (49152, 768, 1), 0); del buf201  # reuse
+	        buf206 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf207 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf265 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_19, x_54, x_55], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf202, buf181, buf189, primals_111, primals_117, primals_118, primals_119, buf206, buf207, buf265, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_111
+	        del primals_117
+	        del primals_119
+	        buf208 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_56], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_121, reinterpret_tensor(buf207, (64, 768), (768, 1), 0), reinterpret_tensor(primals_120, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf208)
+	        del primals_121
+	        buf209 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_38, add_39, mul_36, mul_37, mul_38, pow_10, tanh_9, x_57], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf208, buf209, 196608, grid=grid(196608), stream=stream0)
+	        buf210 = buf189; del buf189  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf209, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_122, (3072, 768), (1, 3072), 0), out=buf210)
+	        buf214 = buf181; del buf181  # reuse
+	        buf215 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf264 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_20, x_60], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf202, buf210, primals_123, primals_124, primals_125, buf214, buf215, buf264, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_125
+	        buf216 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_40], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_127, reinterpret_tensor(buf215, (64, 768), (768, 1), 0), reinterpret_tensor(primals_126, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf216)
+	        del primals_127
+	        # Source Nodes: [y_30], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf217 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf216, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf218 = buf217[0]
+	        buf219 = buf217[1]
+	        buf220 = buf217[2]
+	        buf221 = buf217[3]
+	        del buf217
+	        buf222 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf218, (64, 768), (768, 1), 0), reinterpret_tensor(primals_128, (768, 768), (1, 768), 0), out=buf222)
+	        buf223 = reinterpret_tensor(buf222, (1, 64, 768), (49152, 768, 1), 0); del buf222  # reuse
+	        buf227 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf228 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf263 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_21, x_60, x_61], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf223, buf202, buf210, primals_123, primals_129, primals_130, primals_131, buf227, buf228, buf263, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_123
+	        del primals_129
+	        del primals_131
+	        buf229 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_62], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_133, reinterpret_tensor(buf228, (64, 768), (768, 1), 0), reinterpret_tensor(primals_132, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf229)
+	        del primals_133
+	        buf230 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_42, add_43, mul_40, mul_41, mul_42, pow_11, tanh_10, x_63], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf229, buf230, 196608, grid=grid(196608), stream=stream0)
+	        buf231 = buf210; del buf210  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf230, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_134, (3072, 768), (1, 3072), 0), out=buf231)
+	        buf235 = buf202; del buf202  # reuse
+	        buf236 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf262 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_22, x_66], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_3.run(buf223, buf231, primals_135, primals_136, primals_137, buf235, buf236, buf262, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_137
+	        buf237 = empty_strided_cuda((64, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [linear_44], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_139, reinterpret_tensor(buf236, (64, 768), (768, 1), 0), reinterpret_tensor(primals_138, (768, 2304), (1, 768), 0), alpha=1, beta=1, out=buf237)
+	        del primals_139
+	        # Source Nodes: [y_33], Original ATen: [aten._scaled_dot_product_efficient_attention]
+	        buf238 = torch.ops.aten._scaled_dot_product_efficient_attention.default(reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 768), reinterpret_tensor(buf237, (1, 12, 64, 64), (0, 64, 2304, 1), 1536), None, True, 0.0, True)
+	        buf239 = buf238[0]
+	        buf240 = buf238[1]
+	        buf241 = buf238[2]
+	        buf242 = buf238[3]
+	        del buf238
+	        buf243 = empty_strided_cuda((64, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf239, (64, 768), (768, 1), 0), reinterpret_tensor(primals_140, (768, 768), (1, 768), 0), out=buf243)
+	        buf244 = reinterpret_tensor(buf243, (1, 64, 768), (49152, 768, 1), 0); del buf243  # reuse
+	        buf248 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf249 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        buf261 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [layer_norm_23, x_66, x_67], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_4.run(buf244, buf223, buf231, primals_135, primals_141, primals_142, primals_143, buf248, buf249, buf261, 64, 768, grid=grid(64), stream=stream0)
+	        del primals_135
+	        del primals_141
+	        del primals_143
+	        buf250 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [x_68], Original ATen: [aten.addmm]
+	        extern_kernels.addmm(primals_145, reinterpret_tensor(buf249, (64, 768), (768, 1), 0), reinterpret_tensor(primals_144, (768, 3072), (1, 768), 0), alpha=1, beta=1, out=buf250)
+	        del primals_145
+	        buf251 = empty_strided_cuda((1, 64, 3072), (196608, 3072, 1), torch.float32)
+	        # Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11, x_69], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh]
+	        triton_poi_fused_add_mul_pow_tanh_2.run(buf250, buf251, 196608, grid=grid(196608), stream=stream0)
+	        buf252 = buf231; del buf231  # reuse
+	        # Source Nodes: [], Original ATen: []
+	        extern_kernels.mm(reinterpret_tensor(buf251, (64, 3072), (3072, 1), 0), reinterpret_tensor(primals_146, (3072, 768), (1, 3072), 0), out=buf252)
+	        buf256 = buf223; del buf223  # reuse
+	        buf260 = empty_strided_cuda((1, 64, 1), (64, 1, 1), torch.float32)
+	        # Source Nodes: [x_72, x_73], Original ATen: [aten.add, aten.native_layer_norm, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_native_layer_norm_backward_5.run(buf244, buf252, primals_147, buf256, buf260, 64, 768, grid=grid(64), stream=stream0)
+	        del buf244
+	        del buf252
+	        del primals_147
+	        buf257 = empty_strided_cuda((1, ), (1, ), torch.int64)
+	        # Source Nodes: [getitem_36], Original ATen: [aten.lift_fresh]
+	        triton_poi_fused_lift_fresh_6.run(buf257, 1, grid=grid(1), stream=stream0)
+	        buf258 = empty_strided_cuda((1, 1, 768), (768, 768, 1), torch.float32)
+	        # Source Nodes: [getitem_36, x_73], Original ATen: [aten.index, aten.native_layer_norm]
+	        triton_poi_fused_index_native_layer_norm_7.run(buf256, primals_148, primals_149, buf258, 768, grid=grid(768), stream=stream0)
+	        del primals_149
+	        buf259 = empty_strided_cuda((1, 50304), (50304, 1), torch.float32)
+	        # Source Nodes: [logits], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf258, (1, 768), (0, 1), 0), reinterpret_tensor(primals_2, (768, 50304), (1, 768), 0), out=buf259)
+	    return (reinterpret_tensor(buf259, (1, 1, 50304), (50304, 50304, 1), 0), primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, reinterpret_tensor(buf0, (1, 64), (64, 1), 0), buf4, reinterpret_tensor(buf5, (64, 768), (768, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf6, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf8, buf9, buf10, buf11, buf17, reinterpret_tensor(buf18, (64, 768), (768, 1), 0), buf19, reinterpret_tensor(buf20, (64, 3072), (3072, 1), 0), buf25, reinterpret_tensor(buf26, (64, 768), (768, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf27, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf29, buf30, buf31, buf32, buf38, reinterpret_tensor(buf39, (64, 768), (768, 1), 0), buf40, reinterpret_tensor(buf41, (64, 3072), (3072, 1), 0), buf46, reinterpret_tensor(buf47, (64, 768), (768, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf48, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf50, buf51, buf52, buf53, buf59, reinterpret_tensor(buf60, (64, 768), (768, 1), 0), buf61, reinterpret_tensor(buf62, (64, 3072), (3072, 1), 0), buf67, reinterpret_tensor(buf68, (64, 768), (768, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf69, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf71, buf72, buf73, buf74, buf80, reinterpret_tensor(buf81, (64, 768), (768, 1), 0), buf82, reinterpret_tensor(buf83, (64, 3072), (3072, 1), 0), buf88, reinterpret_tensor(buf89, (64, 768), (768, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf90, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf92, buf93, buf94, buf95, buf101, reinterpret_tensor(buf102, (64, 768), (768, 1), 0), buf103, reinterpret_tensor(buf104, (64, 3072), (3072, 1), 0), buf109, reinterpret_tensor(buf110, (64, 768), (768, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf111, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf113, buf114, buf115, buf116, buf122, reinterpret_tensor(buf123, (64, 768), (768, 1), 0), buf124, reinterpret_tensor(buf125, (64, 3072), (3072, 1), 0), buf130, reinterpret_tensor(buf131, (64, 768), (768, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf132, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf134, buf135, buf136, buf137, buf143, reinterpret_tensor(buf144, (64, 768), (768, 1), 0), buf145, reinterpret_tensor(buf146, (64, 3072), (3072, 1), 0), buf151, reinterpret_tensor(buf152, (64, 768), (768, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf153, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf155, buf156, buf157, buf158, buf164, reinterpret_tensor(buf165, (64, 768), (768, 1), 0), buf166, reinterpret_tensor(buf167, (64, 3072), (3072, 1), 0), buf172, reinterpret_tensor(buf173, (64, 768), (768, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf174, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf176, buf177, buf178, buf179, buf185, reinterpret_tensor(buf186, (64, 768), (768, 1), 0), buf187, reinterpret_tensor(buf188, (64, 3072), (3072, 1), 0), buf193, reinterpret_tensor(buf194, (64, 768), (768, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf195, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf197, buf198, buf199, buf200, buf206, reinterpret_tensor(buf207, (64, 768), (768, 1), 0), buf208, reinterpret_tensor(buf209, (64, 3072), (3072, 1), 0), buf214, reinterpret_tensor(buf215, (64, 768), (768, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf216, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf218, buf219, buf220, buf221, buf227, reinterpret_tensor(buf228, (64, 768), (768, 1), 0), buf229, reinterpret_tensor(buf230, (64, 3072), (3072, 1), 0), buf235, reinterpret_tensor(buf236, (64, 768), (768, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 768), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 0), reinterpret_tensor(buf237, (1, 12, 64, 64), (147456, 64, 2304, 1), 1536), buf239, buf240, buf241, buf242, buf248, reinterpret_tensor(buf249, (64, 768), (768, 1), 0), buf250, reinterpret_tensor(buf251, (64, 3072), (3072, 1), 0), buf256, buf257, reinterpret_tensor(buf258, (1, 768), (768, 1), 0), primals_2, buf260, primals_146, primals_144, buf261, primals_140, primals_138, buf262, primals_134, primals_132, buf263, primals_128, primals_126, buf264, primals_122, primals_120, buf265, primals_116, primals_114, buf266, primals_110, primals_108, buf267, primals_104, primals_102, buf268, primals_98, primals_96, buf269, primals_92, primals_90, buf270, primals_86, primals_84, buf271, primals_80, primals_78, buf272, primals_74, primals_72, buf273, primals_68, primals_66, buf274, primals_62, primals_60, buf275, primals_56, primals_54, buf276, primals_50, primals_48, buf277, primals_44, primals_42, buf278, primals_38, primals_36, buf279, primals_32, primals_30, buf280, primals_26, primals_24, buf281, primals_20, primals_18, buf282, primals_14, primals_12, buf283, primals_8, primals_6, buf284, )
+	
+	
+	def benchmark_compiled_module(times=10, repeat=10):
+	    from torch._dynamo.testing import rand_strided
+	    from torch._inductor.utils import print_performance
+	    primals_1 = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64)
+	    primals_2 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_3 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_4 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_5 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_6 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_7 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_8 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_9 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_10 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_11 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_12 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_13 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_14 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_15 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_16 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_17 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_18 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_19 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_20 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_21 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_22 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_23 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_24 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_25 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_26 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_27 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_28 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_29 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_30 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_31 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_32 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_33 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_34 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_35 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_36 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_37 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_38 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_39 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_40 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_41 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_42 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_43 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_44 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_45 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_46 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_47 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_48 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_49 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_50 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_51 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_52 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_53 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_54 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_55 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_56 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_57 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_58 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_59 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_60 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_61 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_62 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_63 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_64 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_65 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_66 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_67 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_68 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_69 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_70 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_71 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_72 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_73 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_74 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_75 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_76 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_77 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_78 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_79 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_80 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_81 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_82 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_83 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_84 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_85 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_86 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_87 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_88 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_89 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_90 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_91 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_92 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_93 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_94 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_95 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_96 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_97 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_98 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_99 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_100 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_101 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_102 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_103 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_104 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_105 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_106 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_107 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_108 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_109 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_110 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_111 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_112 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_113 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_114 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_115 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_116 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_117 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_118 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_119 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_120 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_121 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_122 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_123 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_124 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_125 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_126 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_127 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_128 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_129 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_130 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_131 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_132 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_133 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_134 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_135 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_136 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_137 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_138 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_139 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_140 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_141 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_142 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_143 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_144 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    primals_145 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_146 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    primals_147 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_148 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_149 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    fn = lambda: call([primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149])
+	    return print_performance(fn, times=times, repeat=repeat)
+	
+	
+	if __name__ == "__main__":
+	    from torch._inductor.wrapper_benchmark import compiled_module_main
+	    compiled_module_main('nanogpt', benchmark_compiled_module)
+	
+V0806 13:56:00.675000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "01eed3091dbbef1e669dd3cfc3e8bc47"}
+	{
+	"name": "code_gen",
+	"ts": 1722977760675133.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.675000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "30b06250406c3bf4a74c6efec9b9ba81"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977760675319.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.757000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "8e46dd4165fe90419697fbe6a8a7e189"}
+	{
+	"name": "fx_graph_cache_miss",
+	"ts": 1722977756402576.5,
+	"args": {
+	"key": "fawswmdqdoeabru4cngdomrqdcmfg5ehi5bfp4lz3lpw74xd2r5q",
+	"cache_state": "miss",
+	"components": [
+	"[7gdnkoxeguoowip7ectpux5j62uq56ccdoktbdshbvoqarzspmh] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149):\n    iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)\n    unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None\n    embedding = torch.ops.aten.embedding.default(primals_2, primals_1)\n    embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None\n    add = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None\n    var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)\n    getitem = var_mean[0]\n    getitem_1 = var_mean[1];  var_mean = None\n    add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None\n    rsqrt = torch.ops.aten.rsqrt.default(add_1);  add_1 = None\n    sub = torch.ops.aten.sub.Tensor(add, getitem_1);  getitem_1 = None\n    mul = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None\n    mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4)\n    add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None\n    view = torch.ops.aten.view.default(add_2, [64, 768]);  add_2 = None\n    permute = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None\n    addmm = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None\n    view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]);  addmm = None\n    split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None\n    getitem_2 = split[0]\n    getitem_3 = split[1]\n    getitem_4 = split[2];  split = None\n    view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None\n    permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None\n    view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None\n    permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None\n    view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None\n    permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None\n    _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)\n    getitem_5 = _scaled_dot_product_efficient_attention[0]\n    getitem_6 = _scaled_dot_product_efficient_attention[1]\n    getitem_7 = _scaled_dot_product_efficient_attention[2]\n    getitem_8 = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None\n    permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n    view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None\n    view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None\n    permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None\n    addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5);  primals_9 = view_6 = None\n    view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]);  addmm_1 = None\n    add_3 = torch.ops.aten.add.Tensor(add, view_7);  add = view_7 = None\n    var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)\n    getitem_9 = var_mean_1[0]\n    getitem_10 = var_mean_1[1];  var_mean_1 = None\n    add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None\n    rsqrt_1 = torch.ops.aten.rsqrt.default(add_4);  add_4 = None\n    sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10);  getitem_10 = None\n    mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None\n    mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10)\n    add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None\n    view_8 = torch.ops.aten.view.default(add_5, [64, 768]);  add_5 = None\n    permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None\n    addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None\n    view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])\n    mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n    pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n    mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None\n    add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  view_9 = mul_5 = None\n    mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None\n    tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None\n    add_7 = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None\n    mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7);  mul_4 = add_7 = None\n    view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]);  mul_7 = None\n    permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None\n    addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7);  primals_15 = None\n    view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]);  addmm_3 = None\n    add_8 = torch.ops.aten.add.Tensor(add_3, view_11);  add_3 = view_11 = None\n    var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)\n    getitem_11 = var_mean_2[0]\n    getitem_12 = var_mean_2[1];  var_mean_2 = None\n    add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None\n    rsqrt_2 = torch.ops.aten.rsqrt.default(add_9);  add_9 = None\n    sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12);  getitem_12 = None\n    mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None\n    mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16)\n    add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None\n    view_12 = torch.ops.aten.view.default(add_10, [64, 768]);  add_10 = None\n    permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None\n    addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None\n    view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]);  addmm_4 = None\n    split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None\n    getitem_13 = split_1[0]\n    getitem_14 = split_1[1]\n    getitem_15 = split_1[2];  split_1 = None\n    view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None\n    permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None\n    view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None\n    permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None\n    view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None\n    permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None\n    _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)\n    getitem_16 = _scaled_dot_product_efficient_attention_1[0]\n    getitem_17 = _scaled_dot_product_efficient_attention_1[1]\n    getitem_18 = _scaled_dot_product_efficient_attention_1[2]\n    getitem_19 = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None\n    permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n    view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None\n    view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None\n    permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None\n    addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13);  primals_21 = view_18 = None\n    view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]);  addmm_5 = None\n    add_11 = torch.ops.aten.add.Tensor(add_8, view_19);  add_8 = view_19 = None\n    var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)\n    getitem_20 = var_mean_3[0]\n    getitem_21 = var_mean_3[1];  var_mean_3 = None\n    add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None\n    rsqrt_3 = torch.ops.aten.rsqrt.default(add_12);  add_12 = None\n    sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21);  getitem_21 = None\n    mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None\n    mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22)\n    add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None\n    view_20 = torch.ops.aten.view.default(add_13, [64, 768]);  add_13 = None\n    permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None\n    addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None\n    view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])\n    mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n    pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n    mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None\n    add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  view_21 = mul_13 = None\n    mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None\n    tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None\n    add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None\n    mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15);  mul_12 = add_15 = None\n    view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]);  mul_15 = None\n    permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None\n    addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15);  primals_27 = None\n    view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]);  addmm_7 = None\n    add_16 = torch.ops.aten.add.Tensor(add_11, view_23);  add_11 = view_23 = None\n    var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)\n    getitem_22 = var_mean_4[0]\n    getitem_23 = var_mean_4[1];  var_mean_4 = None\n    add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None\n    rsqrt_4 = torch.ops.aten.rsqrt.default(add_17);  add_17 = None\n    sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23);  getitem_23 = None\n    mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None\n    mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28)\n    add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None\n    view_24 = torch.ops.aten.view.default(add_18, [64, 768]);  add_18 = None\n    permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None\n    addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None\n    view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]);  addmm_8 = None\n    split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None\n    getitem_24 = split_2[0]\n    getitem_25 = split_2[1]\n    getitem_26 = split_2[2];  split_2 = None\n    view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None\n    permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None\n    view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None\n    permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None\n    view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None\n    permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None\n    _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)\n    getitem_27 = _scaled_dot_product_efficient_attention_2[0]\n    getitem_28 = _scaled_dot_product_efficient_attention_2[1]\n    getitem_29 = _scaled_dot_product_efficient_attention_2[2]\n    getitem_30 = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None\n    permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n    view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None\n    view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None\n    permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None\n    addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21);  primals_33 = view_30 = None\n    view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]);  addmm_9 = None\n    add_19 = torch.ops.aten.add.Tensor(add_16, view_31);  add_16 = view_31 = None\n    var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)\n    getitem_31 = var_mean_5[0]\n    getitem_32 = var_mean_5[1];  var_mean_5 = None\n    add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None\n    rsqrt_5 = torch.ops.aten.rsqrt.default(add_20);  add_20 = None\n    sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32);  getitem_32 = None\n    mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None\n    mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34)\n    add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None\n    view_32 = torch.ops.aten.view.default(add_21, [64, 768]);  add_21 = None\n    permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None\n    addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None\n    view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])\n    mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n    pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n    mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None\n    add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  view_33 = mul_21 = None\n    mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None\n    tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None\n    add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None\n    mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23);  mul_20 = add_23 = None\n    view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]);  mul_23 = None\n    permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None\n    addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23);  primals_39 = None\n    view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]);  addmm_11 = None\n    add_24 = torch.ops.aten.add.Tensor(add_19, view_35);  add_19 = view_35 = None\n    var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)\n    getitem_33 = var_mean_6[0]\n    getitem_34 = var_mean_6[1];  var_mean_6 = None\n    add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None\n    rsqrt_6 = torch.ops.aten.rsqrt.default(add_25);  add_25 = None\n    sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34);  getitem_34 = None\n    mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None\n    mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40)\n    add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None\n    view_36 = torch.ops.aten.view.default(add_26, [64, 768]);  add_26 = None\n    permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None\n    addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None\n    view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]);  addmm_12 = None\n    split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None\n    getitem_35 = split_3[0]\n    getitem_36 = split_3[1]\n    getitem_37 = split_3[2];  split_3 = None\n    view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None\n    permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None\n    view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None\n    permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None\n    view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None\n    permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None\n    _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)\n    getitem_38 = _scaled_dot_product_efficient_attention_3[0]\n    getitem_39 = _scaled_dot_product_efficient_attention_3[1]\n    getitem_40 = _scaled_dot_product_efficient_attention_3[2]\n    getitem_41 = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None\n    permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n    view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None\n    view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None\n    permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None\n    addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29);  primals_45 = view_42 = None\n    view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]);  addmm_13 = None\n    add_27 = torch.ops.aten.add.Tensor(add_24, view_43);  add_24 = view_43 = None\n    var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)\n    getitem_42 = var_mean_7[0]\n    getitem_43 = var_mean_7[1];  var_mean_7 = None\n    add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None\n    rsqrt_7 = torch.ops.aten.rsqrt.default(add_28);  add_28 = None\n    sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43);  getitem_43 = None\n    mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None\n    mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46)\n    add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None\n    view_44 = torch.ops.aten.view.default(add_29, [64, 768]);  add_29 = None\n    permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None\n    addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None\n    view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])\n    mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n    pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n    mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None\n    add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  view_45 = mul_29 = None\n    mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None\n    tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None\n    add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None\n    mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31);  mul_28 = add_31 = None\n    view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]);  mul_31 = None\n    permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None\n    addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31);  primals_51 = None\n    view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]);  addmm_15 = None\n    add_32 = torch.ops.aten.add.Tensor(add_27, view_47);  add_27 = view_47 = None\n    var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)\n    getitem_44 = var_mean_8[0]\n    getitem_45 = var_mean_8[1];  var_mean_8 = None\n    add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None\n    rsqrt_8 = torch.ops.aten.rsqrt.default(add_33);  add_33 = None\n    sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45);  getitem_45 = None\n    mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None\n    mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52)\n    add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None\n    view_48 = torch.ops.aten.view.default(add_34, [64, 768]);  add_34 = None\n    permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None\n    addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None\n    view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]);  addmm_16 = None\n    split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None\n    getitem_46 = split_4[0]\n    getitem_47 = split_4[1]\n    getitem_48 = split_4[2];  split_4 = None\n    view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None\n    permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None\n    view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None\n    permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None\n    view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None\n    permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None\n    _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)\n    getitem_49 = _scaled_dot_product_efficient_attention_4[0]\n    getitem_50 = _scaled_dot_product_efficient_attention_4[1]\n    getitem_51 = _scaled_dot_product_efficient_attention_4[2]\n    getitem_52 = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None\n    permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n    view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None\n    view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None\n    permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None\n    addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37);  primals_57 = view_54 = None\n    view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]);  addmm_17 = None\n    add_35 = torch.ops.aten.add.Tensor(add_32, view_55);  add_32 = view_55 = None\n    var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)\n    getitem_53 = var_mean_9[0]\n    getitem_54 = var_mean_9[1];  var_mean_9 = None\n    add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None\n    rsqrt_9 = torch.ops.aten.rsqrt.default(add_36);  add_36 = None\n    sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54);  getitem_54 = None\n    mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None\n    mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58)\n    add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None\n    view_56 = torch.ops.aten.view.default(add_37, [64, 768]);  add_37 = None\n    permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None\n    addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None\n    view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])\n    mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n    pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n    mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None\n    add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  view_57 = mul_37 = None\n    mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None\n    tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None\n    add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None\n    mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39);  mul_36 = add_39 = None\n    view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]);  mul_39 = None\n    permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None\n    addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39);  primals_63 = None\n    view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]);  addmm_19 = None\n    add_40 = torch.ops.aten.add.Tensor(add_35, view_59);  add_35 = view_59 = None\n    var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)\n    getitem_55 = var_mean_10[0]\n    getitem_56 = var_mean_10[1];  var_mean_10 = None\n    add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None\n    rsqrt_10 = torch.ops.aten.rsqrt.default(add_41);  add_41 = None\n    sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56);  getitem_56 = None\n    mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None\n    mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64)\n    add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None\n    view_60 = torch.ops.aten.view.default(add_42, [64, 768]);  add_42 = None\n    permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None\n    addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None\n    view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]);  addmm_20 = None\n    split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None\n    getitem_57 = split_5[0]\n    getitem_58 = split_5[1]\n    getitem_59 = split_5[2];  split_5 = None\n    view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None\n    permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None\n    view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None\n    permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None\n    view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None\n    permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None\n    _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)\n    getitem_60 = _scaled_dot_product_efficient_attention_5[0]\n    getitem_61 = _scaled_dot_product_efficient_attention_5[1]\n    getitem_62 = _scaled_dot_product_efficient_attention_5[2]\n    getitem_63 = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None\n    permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n    view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None\n    view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None\n    permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None\n    addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45);  primals_69 = view_66 = None\n    view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]);  addmm_21 = None\n    add_43 = torch.ops.aten.add.Tensor(add_40, view_67);  add_40 = view_67 = None\n    var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)\n    getitem_64 = var_mean_11[0]\n    getitem_65 = var_mean_11[1];  var_mean_11 = None\n    add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None\n    rsqrt_11 = torch.ops.aten.rsqrt.default(add_44);  add_44 = None\n    sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65);  getitem_65 = None\n    mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None\n    mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70)\n    add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None\n    view_68 = torch.ops.aten.view.default(add_45, [64, 768]);  add_45 = None\n    permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None\n    addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None\n    view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])\n    mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n    pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n    mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None\n    add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  view_69 = mul_45 = None\n    mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None\n    tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None\n    add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None\n    mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47);  mul_44 = add_47 = None\n    view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]);  mul_47 = None\n    permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None\n    addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47);  primals_75 = None\n    view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]);  addmm_23 = None\n    add_48 = torch.ops.aten.add.Tensor(add_43, view_71);  add_43 = view_71 = None\n    var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)\n    getitem_66 = var_mean_12[0]\n    getitem_67 = var_mean_12[1];  var_mean_12 = None\n    add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None\n    rsqrt_12 = torch.ops.aten.rsqrt.default(add_49);  add_49 = None\n    sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67);  getitem_67 = None\n    mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None\n    mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76)\n    add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None\n    view_72 = torch.ops.aten.view.default(add_50, [64, 768]);  add_50 = None\n    permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None\n    addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None\n    view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]);  addmm_24 = None\n    split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None\n    getitem_68 = split_6[0]\n    getitem_69 = split_6[1]\n    getitem_70 = split_6[2];  split_6 = None\n    view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None\n    permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None\n    view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None\n    permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None\n    view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None\n    permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None\n    _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)\n    getitem_71 = _scaled_dot_product_efficient_attention_6[0]\n    getitem_72 = _scaled_dot_product_efficient_attention_6[1]\n    getitem_73 = _scaled_dot_product_efficient_attention_6[2]\n    getitem_74 = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None\n    permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n    view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None\n    view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None\n    permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None\n    addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53);  primals_81 = view_78 = None\n    view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]);  addmm_25 = None\n    add_51 = torch.ops.aten.add.Tensor(add_48, view_79);  add_48 = view_79 = None\n    var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)\n    getitem_75 = var_mean_13[0]\n    getitem_76 = var_mean_13[1];  var_mean_13 = None\n    add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None\n    rsqrt_13 = torch.ops.aten.rsqrt.default(add_52);  add_52 = None\n    sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76);  getitem_76 = None\n    mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None\n    mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82)\n    add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None\n    view_80 = torch.ops.aten.view.default(add_53, [64, 768]);  add_53 = None\n    permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None\n    addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None\n    view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])\n    mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n    pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n    mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None\n    add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  view_81 = mul_53 = None\n    mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None\n    tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None\n    add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None\n    mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55);  mul_52 = add_55 = None\n    view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]);  mul_55 = None\n    permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None\n    addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55);  primals_87 = None\n    view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]);  addmm_27 = None\n    add_56 = torch.ops.aten.add.Tensor(add_51, view_83);  add_51 = view_83 = None\n    var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)\n    getitem_77 = var_mean_14[0]\n    getitem_78 = var_mean_14[1];  var_mean_14 = None\n    add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None\n    rsqrt_14 = torch.ops.aten.rsqrt.default(add_57);  add_57 = None\n    sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78);  getitem_78 = None\n    mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None\n    mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88)\n    add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None\n    view_84 = torch.ops.aten.view.default(add_58, [64, 768]);  add_58 = None\n    permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None\n    addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None\n    view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]);  addmm_28 = None\n    split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None\n    getitem_79 = split_7[0]\n    getitem_80 = split_7[1]\n    getitem_81 = split_7[2];  split_7 = None\n    view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None\n    permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None\n    view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None\n    permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None\n    view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None\n    permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None\n    _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)\n    getitem_82 = _scaled_dot_product_efficient_attention_7[0]\n    getitem_83 = _scaled_dot_product_efficient_attention_7[1]\n    getitem_84 = _scaled_dot_product_efficient_attention_7[2]\n    getitem_85 = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None\n    permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n    view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None\n    view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None\n    permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None\n    addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61);  primals_93 = view_90 = None\n    view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]);  addmm_29 = None\n    add_59 = torch.ops.aten.add.Tensor(add_56, view_91);  add_56 = view_91 = None\n    var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)\n    getitem_86 = var_mean_15[0]\n    getitem_87 = var_mean_15[1];  var_mean_15 = None\n    add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None\n    rsqrt_15 = torch.ops.aten.rsqrt.default(add_60);  add_60 = None\n    sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87);  getitem_87 = None\n    mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None\n    mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94)\n    add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None\n    view_92 = torch.ops.aten.view.default(add_61, [64, 768]);  add_61 = None\n    permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None\n    addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None\n    view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])\n    mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n    pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n    mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None\n    add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  view_93 = mul_61 = None\n    mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None\n    tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None\n    add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None\n    mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63);  mul_60 = add_63 = None\n    view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]);  mul_63 = None\n    permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None\n    addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63);  primals_99 = None\n    view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]);  addmm_31 = None\n    add_64 = torch.ops.aten.add.Tensor(add_59, view_95);  add_59 = view_95 = None\n    var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)\n    getitem_88 = var_mean_16[0]\n    getitem_89 = var_mean_16[1];  var_mean_16 = None\n    add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None\n    rsqrt_16 = torch.ops.aten.rsqrt.default(add_65);  add_65 = None\n    sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89);  getitem_89 = None\n    mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None\n    mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100)\n    add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None\n    view_96 = torch.ops.aten.view.default(add_66, [64, 768]);  add_66 = None\n    permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None\n    addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None\n    view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]);  addmm_32 = None\n    split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None\n    getitem_90 = split_8[0]\n    getitem_91 = split_8[1]\n    getitem_92 = split_8[2];  split_8 = None\n    view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None\n    permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None\n    view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None\n    permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None\n    view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None\n    permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None\n    _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)\n    getitem_93 = _scaled_dot_product_efficient_attention_8[0]\n    getitem_94 = _scaled_dot_product_efficient_attention_8[1]\n    getitem_95 = _scaled_dot_product_efficient_attention_8[2]\n    getitem_96 = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None\n    permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n    view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None\n    view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None\n    permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None\n    addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69);  primals_105 = view_102 = None\n    view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]);  addmm_33 = None\n    add_67 = torch.ops.aten.add.Tensor(add_64, view_103);  add_64 = view_103 = None\n    var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)\n    getitem_97 = var_mean_17[0]\n    getitem_98 = var_mean_17[1];  var_mean_17 = None\n    add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None\n    rsqrt_17 = torch.ops.aten.rsqrt.default(add_68);  add_68 = None\n    sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98);  getitem_98 = None\n    mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None\n    mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106)\n    add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None\n    view_104 = torch.ops.aten.view.default(add_69, [64, 768]);  add_69 = None\n    permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None\n    addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None\n    view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])\n    mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n    pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n    mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None\n    add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  view_105 = mul_69 = None\n    mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None\n    tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None\n    add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None\n    mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71);  mul_68 = add_71 = None\n    view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]);  mul_71 = None\n    permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None\n    addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71);  primals_111 = None\n    view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]);  addmm_35 = None\n    add_72 = torch.ops.aten.add.Tensor(add_67, view_107);  add_67 = view_107 = None\n    var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)\n    getitem_99 = var_mean_18[0]\n    getitem_100 = var_mean_18[1];  var_mean_18 = None\n    add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None\n    rsqrt_18 = torch.ops.aten.rsqrt.default(add_73);  add_73 = None\n    sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100);  getitem_100 = None\n    mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None\n    mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112)\n    add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None\n    view_108 = torch.ops.aten.view.default(add_74, [64, 768]);  add_74 = None\n    permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None\n    addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None\n    view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]);  addmm_36 = None\n    split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None\n    getitem_101 = split_9[0]\n    getitem_102 = split_9[1]\n    getitem_103 = split_9[2];  split_9 = None\n    view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None\n    permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None\n    view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None\n    permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None\n    view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None\n    permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None\n    _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)\n    getitem_104 = _scaled_dot_product_efficient_attention_9[0]\n    getitem_105 = _scaled_dot_product_efficient_attention_9[1]\n    getitem_106 = _scaled_dot_product_efficient_attention_9[2]\n    getitem_107 = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None\n    permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n    view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None\n    view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None\n    permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None\n    addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77);  primals_117 = view_114 = None\n    view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]);  addmm_37 = None\n    add_75 = torch.ops.aten.add.Tensor(add_72, view_115);  add_72 = view_115 = None\n    var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)\n    getitem_108 = var_mean_19[0]\n    getitem_109 = var_mean_19[1];  var_mean_19 = None\n    add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None\n    rsqrt_19 = torch.ops.aten.rsqrt.default(add_76);  add_76 = None\n    sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109);  getitem_109 = None\n    mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None\n    mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118)\n    add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None\n    view_116 = torch.ops.aten.view.default(add_77, [64, 768]);  add_77 = None\n    permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None\n    addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None\n    view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])\n    mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n    pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n    mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None\n    add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  view_117 = mul_77 = None\n    mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None\n    tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None\n    add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None\n    mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79);  mul_76 = add_79 = None\n    view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]);  mul_79 = None\n    permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None\n    addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79);  primals_123 = None\n    view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]);  addmm_39 = None\n    add_80 = torch.ops.aten.add.Tensor(add_75, view_119);  add_75 = view_119 = None\n    var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)\n    getitem_110 = var_mean_20[0]\n    getitem_111 = var_mean_20[1];  var_mean_20 = None\n    add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None\n    rsqrt_20 = torch.ops.aten.rsqrt.default(add_81);  add_81 = None\n    sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111);  getitem_111 = None\n    mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None\n    mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124)\n    add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None\n    view_120 = torch.ops.aten.view.default(add_82, [64, 768]);  add_82 = None\n    permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None\n    addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None\n    view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]);  addmm_40 = None\n    split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None\n    getitem_112 = split_10[0]\n    getitem_113 = split_10[1]\n    getitem_114 = split_10[2];  split_10 = None\n    view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None\n    permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None\n    view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None\n    permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None\n    view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None\n    permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None\n    _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)\n    getitem_115 = _scaled_dot_product_efficient_attention_10[0]\n    getitem_116 = _scaled_dot_product_efficient_attention_10[1]\n    getitem_117 = _scaled_dot_product_efficient_attention_10[2]\n    getitem_118 = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None\n    permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n    view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None\n    view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None\n    permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None\n    addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85);  primals_129 = view_126 = None\n    view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]);  addmm_41 = None\n    add_83 = torch.ops.aten.add.Tensor(add_80, view_127);  add_80 = view_127 = None\n    var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)\n    getitem_119 = var_mean_21[0]\n    getitem_120 = var_mean_21[1];  var_mean_21 = None\n    add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None\n    rsqrt_21 = torch.ops.aten.rsqrt.default(add_84);  add_84 = None\n    sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120);  getitem_120 = None\n    mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None\n    mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130)\n    add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None\n    view_128 = torch.ops.aten.view.default(add_85, [64, 768]);  add_85 = None\n    permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None\n    addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None\n    view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])\n    mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n    pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n    mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None\n    add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  view_129 = mul_85 = None\n    mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None\n    tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None\n    add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None\n    mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87);  mul_84 = add_87 = None\n    view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]);  mul_87 = None\n    permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None\n    addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87);  primals_135 = None\n    view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]);  addmm_43 = None\n    add_88 = torch.ops.aten.add.Tensor(add_83, view_131);  add_83 = view_131 = None\n    var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)\n    getitem_121 = var_mean_22[0]\n    getitem_122 = var_mean_22[1];  var_mean_22 = None\n    add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None\n    rsqrt_22 = torch.ops.aten.rsqrt.default(add_89);  add_89 = None\n    sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122);  getitem_122 = None\n    mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None\n    mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136)\n    add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None\n    view_132 = torch.ops.aten.view.default(add_90, [64, 768]);  add_90 = None\n    permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None\n    addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None\n    view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]);  addmm_44 = None\n    split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None\n    getitem_123 = split_11[0]\n    getitem_124 = split_11[1]\n    getitem_125 = split_11[2];  split_11 = None\n    view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None\n    permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None\n    view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None\n    permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None\n    view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None\n    permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None\n    _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)\n    getitem_126 = _scaled_dot_product_efficient_attention_11[0]\n    getitem_127 = _scaled_dot_product_efficient_attention_11[1]\n    getitem_128 = _scaled_dot_product_efficient_attention_11[2]\n    getitem_129 = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None\n    permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n    view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None\n    view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None\n    permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None\n    addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93);  primals_141 = view_138 = None\n    view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]);  addmm_45 = None\n    add_91 = torch.ops.aten.add.Tensor(add_88, view_139);  add_88 = view_139 = None\n    var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)\n    getitem_130 = var_mean_23[0]\n    getitem_131 = var_mean_23[1];  var_mean_23 = None\n    add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None\n    rsqrt_23 = torch.ops.aten.rsqrt.default(add_92);  add_92 = None\n    sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131);  getitem_131 = None\n    mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None\n    mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142)\n    add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None\n    view_140 = torch.ops.aten.view.default(add_93, [64, 768]);  add_93 = None\n    permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None\n    addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None\n    view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])\n    mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n    pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n    mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None\n    add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  view_141 = mul_93 = None\n    mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None\n    tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None\n    add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None\n    mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95);  mul_92 = add_95 = None\n    view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]);  mul_95 = None\n    permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None\n    addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95);  primals_147 = None\n    view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]);  addmm_47 = None\n    add_96 = torch.ops.aten.add.Tensor(add_91, view_143);  add_91 = view_143 = None\n    var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)\n    getitem_132 = var_mean_24[0]\n    getitem_133 = var_mean_24[1];  var_mean_24 = None\n    add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None\n    rsqrt_24 = torch.ops.aten.rsqrt.default(add_97);  add_97 = None\n    sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None\n    mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None\n    mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148)\n    add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None\n    full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    index = torch.ops.aten.index.Tensor(add_98, [None, full_default]);  add_98 = None\n    permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None\n    view_144 = torch.ops.aten.view.default(index, [1, 768]);  index = None\n    mm = torch.ops.aten.mm.default(view_144, permute_96)\n    view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]);  mm = None\n    permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None\n    div = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None\n    permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None\n    permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None\n    div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None\n    permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None\n    permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None\n    div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None\n    permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None\n    permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None\n    div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None\n    permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None\n    permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None\n    div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None\n    permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None\n    permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None\n    div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None\n    permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None\n    permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None\n    div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None\n    permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None\n    permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None\n    div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None\n    permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None\n    permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None\n    div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None\n    permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None\n    permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None\n    div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None\n    permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None\n    permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None\n    div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None\n    permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None\n    permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None\n    div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None\n    permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None\n    permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None\n    div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None\n    permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None\n    permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None\n    div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None\n    permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None\n    permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None\n    div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None\n    permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None\n    permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None\n    div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None\n    permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None\n    permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None\n    div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None\n    permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None\n    permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None\n    div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None\n    permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None\n    permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None\n    div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None\n    permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None\n    permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None\n    div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None\n    permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None\n    permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None\n    div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None\n    permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None\n    permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None\n    div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None\n    permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None\n    permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None\n    div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None\n    permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None\n    permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None\n    div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None\n    permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None\n    permute_337 = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None\n    div_24 = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None\n    return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
+	"[yily4oahymyyzyspnyjgkwteqzeiwe4kjdldmy3tmjumziqf7zb] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
+	"[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
+	"[gqb2vspiuwox2kgd2oeoxezbk3ia6ckfpuiqza2vhvphouxwhv5] fx_kwargs[static_input_idxs]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]",
+	"[moyibva4eclxkrvb6e7da5ve2knrozngxwjgojtfbwsd4wt762m] fx_kwargs[user_visible_outputs]: {'view_145': None}",
+	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
+	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
+	"[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)",
+	"[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}",
+	"[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
+	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
+	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
+	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
+	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
+	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
+	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
+	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
+	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
+	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
+	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None",
+	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
+	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
+	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
+	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
+	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
+	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
+	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
+	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
+	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
+	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
+	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
+	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
+	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
+	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
+	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
+	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
+	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None",
+	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
+	"[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
+	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
+	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
+	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
+	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
+	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
+	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
+	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
+	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
+	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
+	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
+	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
+	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
+	"[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"
+	]
+	},
+	"ph": "i",
+	"pid": 0,
+	"s": "p"
+	}
+V0806 13:56:00.757000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "383a721c539a14dd80b4ff9efee951aa"}
+	{"key": "fawswmdqdoeabru4cngdomrqdcmfg5ehi5bfp4lz3lpw74xd2r5q", "cache_state": "miss", "components": ["[7gdnkoxeguoowip7ectpux5j62uq56ccdoktbdshbvoqarzspmh] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, primals_7, primals_8, primals_9, primals_10, primals_11, primals_12, primals_13, primals_14, primals_15, primals_16, primals_17, primals_18, primals_19, primals_20, primals_21, primals_22, primals_23, primals_24, primals_25, primals_26, primals_27, primals_28, primals_29, primals_30, primals_31, primals_32, primals_33, primals_34, primals_35, primals_36, primals_37, primals_38, primals_39, primals_40, primals_41, primals_42, primals_43, primals_44, primals_45, primals_46, primals_47, primals_48, primals_49, primals_50, primals_51, primals_52, primals_53, primals_54, primals_55, primals_56, primals_57, primals_58, primals_59, primals_60, primals_61, primals_62, primals_63, primals_64, primals_65, primals_66, primals_67, primals_68, primals_69, primals_70, primals_71, primals_72, primals_73, primals_74, primals_75, primals_76, primals_77, primals_78, primals_79, primals_80, primals_81, primals_82, primals_83, primals_84, primals_85, primals_86, primals_87, primals_88, primals_89, primals_90, primals_91, primals_92, primals_93, primals_94, primals_95, primals_96, primals_97, primals_98, primals_99, primals_100, primals_101, primals_102, primals_103, primals_104, primals_105, primals_106, primals_107, primals_108, primals_109, primals_110, primals_111, primals_112, primals_113, primals_114, primals_115, primals_116, primals_117, primals_118, primals_119, primals_120, primals_121, primals_122, primals_123, primals_124, primals_125, primals_126, primals_127, primals_128, primals_129, primals_130, primals_131, primals_132, primals_133, primals_134, primals_135, primals_136, primals_137, primals_138, primals_139, primals_140, primals_141, primals_142, primals_143, primals_144, primals_145, primals_146, primals_147, primals_148, primals_149):\n    iota = torch.ops.prims.iota.default(64, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False)\n    unsqueeze = torch.ops.aten.unsqueeze.default(iota, 0);  iota = None\n    embedding = torch.ops.aten.embedding.default(primals_2, primals_1)\n    embedding_1 = torch.ops.aten.embedding.default(primals_3, unsqueeze);  primals_3 = None\n    add = torch.ops.aten.add.Tensor(embedding, embedding_1);  embedding = embedding_1 = None\n    var_mean = torch.ops.aten.var_mean.correction(add, [2], correction = 0, keepdim = True)\n    getitem = var_mean[0]\n    getitem_1 = var_mean[1];  var_mean = None\n    add_1 = torch.ops.aten.add.Tensor(getitem, 1e-05);  getitem = None\n    rsqrt = torch.ops.aten.rsqrt.default(add_1);  add_1 = None\n    sub = torch.ops.aten.sub.Tensor(add, getitem_1);  getitem_1 = None\n    mul = torch.ops.aten.mul.Tensor(sub, rsqrt);  sub = None\n    mul_1 = torch.ops.aten.mul.Tensor(mul, primals_4)\n    add_2 = torch.ops.aten.add.Tensor(mul_1, primals_5);  mul_1 = primals_5 = None\n    view = torch.ops.aten.view.default(add_2, [64, 768]);  add_2 = None\n    permute = torch.ops.aten.permute.default(primals_6, [1, 0]);  primals_6 = None\n    addmm = torch.ops.aten.addmm.default(primals_7, view, permute);  primals_7 = None\n    view_1 = torch.ops.aten.view.default(addmm, [1, 64, 2304]);  addmm = None\n    split = torch.ops.aten.split.Tensor(view_1, 768, 2);  view_1 = None\n    getitem_2 = split[0]\n    getitem_3 = split[1]\n    getitem_4 = split[2];  split = None\n    view_2 = torch.ops.aten.view.default(getitem_3, [1, 64, 12, 64]);  getitem_3 = None\n    permute_1 = torch.ops.aten.permute.default(view_2, [0, 2, 1, 3]);  view_2 = None\n    view_3 = torch.ops.aten.view.default(getitem_2, [1, 64, 12, 64]);  getitem_2 = None\n    permute_2 = torch.ops.aten.permute.default(view_3, [0, 2, 1, 3]);  view_3 = None\n    view_4 = torch.ops.aten.view.default(getitem_4, [1, 64, 12, 64]);  getitem_4 = None\n    permute_3 = torch.ops.aten.permute.default(view_4, [0, 2, 1, 3]);  view_4 = None\n    _scaled_dot_product_efficient_attention = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_2, permute_1, permute_3, None, True, 0.0, True)\n    getitem_5 = _scaled_dot_product_efficient_attention[0]\n    getitem_6 = _scaled_dot_product_efficient_attention[1]\n    getitem_7 = _scaled_dot_product_efficient_attention[2]\n    getitem_8 = _scaled_dot_product_efficient_attention[3];  _scaled_dot_product_efficient_attention = None\n    permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n    view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None\n    view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None\n    permute_5 = torch.ops.aten.permute.default(primals_8, [1, 0]);  primals_8 = None\n    addmm_1 = torch.ops.aten.addmm.default(primals_9, view_6, permute_5);  primals_9 = view_6 = None\n    view_7 = torch.ops.aten.view.default(addmm_1, [1, 64, 768]);  addmm_1 = None\n    add_3 = torch.ops.aten.add.Tensor(add, view_7);  add = view_7 = None\n    var_mean_1 = torch.ops.aten.var_mean.correction(add_3, [2], correction = 0, keepdim = True)\n    getitem_9 = var_mean_1[0]\n    getitem_10 = var_mean_1[1];  var_mean_1 = None\n    add_4 = torch.ops.aten.add.Tensor(getitem_9, 1e-05);  getitem_9 = None\n    rsqrt_1 = torch.ops.aten.rsqrt.default(add_4);  add_4 = None\n    sub_1 = torch.ops.aten.sub.Tensor(add_3, getitem_10);  getitem_10 = None\n    mul_2 = torch.ops.aten.mul.Tensor(sub_1, rsqrt_1);  sub_1 = None\n    mul_3 = torch.ops.aten.mul.Tensor(mul_2, primals_10)\n    add_5 = torch.ops.aten.add.Tensor(mul_3, primals_11);  mul_3 = primals_11 = None\n    view_8 = torch.ops.aten.view.default(add_5, [64, 768]);  add_5 = None\n    permute_6 = torch.ops.aten.permute.default(primals_12, [1, 0]);  primals_12 = None\n    addmm_2 = torch.ops.aten.addmm.default(primals_13, view_8, permute_6);  primals_13 = None\n    view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072])\n    mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n    pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n    mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None\n    add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  view_9 = mul_5 = None\n    mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None\n    tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None\n    add_7 = torch.ops.aten.add.Tensor(tanh, 1.0);  tanh = None\n    mul_7 = torch.ops.aten.mul.Tensor(mul_4, add_7);  mul_4 = add_7 = None\n    view_10 = torch.ops.aten.view.default(mul_7, [64, 3072]);  mul_7 = None\n    permute_7 = torch.ops.aten.permute.default(primals_14, [1, 0]);  primals_14 = None\n    addmm_3 = torch.ops.aten.addmm.default(primals_15, view_10, permute_7);  primals_15 = None\n    view_11 = torch.ops.aten.view.default(addmm_3, [1, 64, 768]);  addmm_3 = None\n    add_8 = torch.ops.aten.add.Tensor(add_3, view_11);  add_3 = view_11 = None\n    var_mean_2 = torch.ops.aten.var_mean.correction(add_8, [2], correction = 0, keepdim = True)\n    getitem_11 = var_mean_2[0]\n    getitem_12 = var_mean_2[1];  var_mean_2 = None\n    add_9 = torch.ops.aten.add.Tensor(getitem_11, 1e-05);  getitem_11 = None\n    rsqrt_2 = torch.ops.aten.rsqrt.default(add_9);  add_9 = None\n    sub_2 = torch.ops.aten.sub.Tensor(add_8, getitem_12);  getitem_12 = None\n    mul_8 = torch.ops.aten.mul.Tensor(sub_2, rsqrt_2);  sub_2 = None\n    mul_9 = torch.ops.aten.mul.Tensor(mul_8, primals_16)\n    add_10 = torch.ops.aten.add.Tensor(mul_9, primals_17);  mul_9 = primals_17 = None\n    view_12 = torch.ops.aten.view.default(add_10, [64, 768]);  add_10 = None\n    permute_8 = torch.ops.aten.permute.default(primals_18, [1, 0]);  primals_18 = None\n    addmm_4 = torch.ops.aten.addmm.default(primals_19, view_12, permute_8);  primals_19 = None\n    view_13 = torch.ops.aten.view.default(addmm_4, [1, 64, 2304]);  addmm_4 = None\n    split_1 = torch.ops.aten.split.Tensor(view_13, 768, 2);  view_13 = None\n    getitem_13 = split_1[0]\n    getitem_14 = split_1[1]\n    getitem_15 = split_1[2];  split_1 = None\n    view_14 = torch.ops.aten.view.default(getitem_14, [1, 64, 12, 64]);  getitem_14 = None\n    permute_9 = torch.ops.aten.permute.default(view_14, [0, 2, 1, 3]);  view_14 = None\n    view_15 = torch.ops.aten.view.default(getitem_13, [1, 64, 12, 64]);  getitem_13 = None\n    permute_10 = torch.ops.aten.permute.default(view_15, [0, 2, 1, 3]);  view_15 = None\n    view_16 = torch.ops.aten.view.default(getitem_15, [1, 64, 12, 64]);  getitem_15 = None\n    permute_11 = torch.ops.aten.permute.default(view_16, [0, 2, 1, 3]);  view_16 = None\n    _scaled_dot_product_efficient_attention_1 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_10, permute_9, permute_11, None, True, 0.0, True)\n    getitem_16 = _scaled_dot_product_efficient_attention_1[0]\n    getitem_17 = _scaled_dot_product_efficient_attention_1[1]\n    getitem_18 = _scaled_dot_product_efficient_attention_1[2]\n    getitem_19 = _scaled_dot_product_efficient_attention_1[3];  _scaled_dot_product_efficient_attention_1 = None\n    permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n    view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None\n    view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None\n    permute_13 = torch.ops.aten.permute.default(primals_20, [1, 0]);  primals_20 = None\n    addmm_5 = torch.ops.aten.addmm.default(primals_21, view_18, permute_13);  primals_21 = view_18 = None\n    view_19 = torch.ops.aten.view.default(addmm_5, [1, 64, 768]);  addmm_5 = None\n    add_11 = torch.ops.aten.add.Tensor(add_8, view_19);  add_8 = view_19 = None\n    var_mean_3 = torch.ops.aten.var_mean.correction(add_11, [2], correction = 0, keepdim = True)\n    getitem_20 = var_mean_3[0]\n    getitem_21 = var_mean_3[1];  var_mean_3 = None\n    add_12 = torch.ops.aten.add.Tensor(getitem_20, 1e-05);  getitem_20 = None\n    rsqrt_3 = torch.ops.aten.rsqrt.default(add_12);  add_12 = None\n    sub_3 = torch.ops.aten.sub.Tensor(add_11, getitem_21);  getitem_21 = None\n    mul_10 = torch.ops.aten.mul.Tensor(sub_3, rsqrt_3);  sub_3 = None\n    mul_11 = torch.ops.aten.mul.Tensor(mul_10, primals_22)\n    add_13 = torch.ops.aten.add.Tensor(mul_11, primals_23);  mul_11 = primals_23 = None\n    view_20 = torch.ops.aten.view.default(add_13, [64, 768]);  add_13 = None\n    permute_14 = torch.ops.aten.permute.default(primals_24, [1, 0]);  primals_24 = None\n    addmm_6 = torch.ops.aten.addmm.default(primals_25, view_20, permute_14);  primals_25 = None\n    view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072])\n    mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n    pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n    mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None\n    add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  view_21 = mul_13 = None\n    mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None\n    tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None\n    add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0);  tanh_1 = None\n    mul_15 = torch.ops.aten.mul.Tensor(mul_12, add_15);  mul_12 = add_15 = None\n    view_22 = torch.ops.aten.view.default(mul_15, [64, 3072]);  mul_15 = None\n    permute_15 = torch.ops.aten.permute.default(primals_26, [1, 0]);  primals_26 = None\n    addmm_7 = torch.ops.aten.addmm.default(primals_27, view_22, permute_15);  primals_27 = None\n    view_23 = torch.ops.aten.view.default(addmm_7, [1, 64, 768]);  addmm_7 = None\n    add_16 = torch.ops.aten.add.Tensor(add_11, view_23);  add_11 = view_23 = None\n    var_mean_4 = torch.ops.aten.var_mean.correction(add_16, [2], correction = 0, keepdim = True)\n    getitem_22 = var_mean_4[0]\n    getitem_23 = var_mean_4[1];  var_mean_4 = None\n    add_17 = torch.ops.aten.add.Tensor(getitem_22, 1e-05);  getitem_22 = None\n    rsqrt_4 = torch.ops.aten.rsqrt.default(add_17);  add_17 = None\n    sub_4 = torch.ops.aten.sub.Tensor(add_16, getitem_23);  getitem_23 = None\n    mul_16 = torch.ops.aten.mul.Tensor(sub_4, rsqrt_4);  sub_4 = None\n    mul_17 = torch.ops.aten.mul.Tensor(mul_16, primals_28)\n    add_18 = torch.ops.aten.add.Tensor(mul_17, primals_29);  mul_17 = primals_29 = None\n    view_24 = torch.ops.aten.view.default(add_18, [64, 768]);  add_18 = None\n    permute_16 = torch.ops.aten.permute.default(primals_30, [1, 0]);  primals_30 = None\n    addmm_8 = torch.ops.aten.addmm.default(primals_31, view_24, permute_16);  primals_31 = None\n    view_25 = torch.ops.aten.view.default(addmm_8, [1, 64, 2304]);  addmm_8 = None\n    split_2 = torch.ops.aten.split.Tensor(view_25, 768, 2);  view_25 = None\n    getitem_24 = split_2[0]\n    getitem_25 = split_2[1]\n    getitem_26 = split_2[2];  split_2 = None\n    view_26 = torch.ops.aten.view.default(getitem_25, [1, 64, 12, 64]);  getitem_25 = None\n    permute_17 = torch.ops.aten.permute.default(view_26, [0, 2, 1, 3]);  view_26 = None\n    view_27 = torch.ops.aten.view.default(getitem_24, [1, 64, 12, 64]);  getitem_24 = None\n    permute_18 = torch.ops.aten.permute.default(view_27, [0, 2, 1, 3]);  view_27 = None\n    view_28 = torch.ops.aten.view.default(getitem_26, [1, 64, 12, 64]);  getitem_26 = None\n    permute_19 = torch.ops.aten.permute.default(view_28, [0, 2, 1, 3]);  view_28 = None\n    _scaled_dot_product_efficient_attention_2 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_18, permute_17, permute_19, None, True, 0.0, True)\n    getitem_27 = _scaled_dot_product_efficient_attention_2[0]\n    getitem_28 = _scaled_dot_product_efficient_attention_2[1]\n    getitem_29 = _scaled_dot_product_efficient_attention_2[2]\n    getitem_30 = _scaled_dot_product_efficient_attention_2[3];  _scaled_dot_product_efficient_attention_2 = None\n    permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n    view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None\n    view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None\n    permute_21 = torch.ops.aten.permute.default(primals_32, [1, 0]);  primals_32 = None\n    addmm_9 = torch.ops.aten.addmm.default(primals_33, view_30, permute_21);  primals_33 = view_30 = None\n    view_31 = torch.ops.aten.view.default(addmm_9, [1, 64, 768]);  addmm_9 = None\n    add_19 = torch.ops.aten.add.Tensor(add_16, view_31);  add_16 = view_31 = None\n    var_mean_5 = torch.ops.aten.var_mean.correction(add_19, [2], correction = 0, keepdim = True)\n    getitem_31 = var_mean_5[0]\n    getitem_32 = var_mean_5[1];  var_mean_5 = None\n    add_20 = torch.ops.aten.add.Tensor(getitem_31, 1e-05);  getitem_31 = None\n    rsqrt_5 = torch.ops.aten.rsqrt.default(add_20);  add_20 = None\n    sub_5 = torch.ops.aten.sub.Tensor(add_19, getitem_32);  getitem_32 = None\n    mul_18 = torch.ops.aten.mul.Tensor(sub_5, rsqrt_5);  sub_5 = None\n    mul_19 = torch.ops.aten.mul.Tensor(mul_18, primals_34)\n    add_21 = torch.ops.aten.add.Tensor(mul_19, primals_35);  mul_19 = primals_35 = None\n    view_32 = torch.ops.aten.view.default(add_21, [64, 768]);  add_21 = None\n    permute_22 = torch.ops.aten.permute.default(primals_36, [1, 0]);  primals_36 = None\n    addmm_10 = torch.ops.aten.addmm.default(primals_37, view_32, permute_22);  primals_37 = None\n    view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072])\n    mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n    pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n    mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None\n    add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  view_33 = mul_21 = None\n    mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None\n    tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None\n    add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0);  tanh_2 = None\n    mul_23 = torch.ops.aten.mul.Tensor(mul_20, add_23);  mul_20 = add_23 = None\n    view_34 = torch.ops.aten.view.default(mul_23, [64, 3072]);  mul_23 = None\n    permute_23 = torch.ops.aten.permute.default(primals_38, [1, 0]);  primals_38 = None\n    addmm_11 = torch.ops.aten.addmm.default(primals_39, view_34, permute_23);  primals_39 = None\n    view_35 = torch.ops.aten.view.default(addmm_11, [1, 64, 768]);  addmm_11 = None\n    add_24 = torch.ops.aten.add.Tensor(add_19, view_35);  add_19 = view_35 = None\n    var_mean_6 = torch.ops.aten.var_mean.correction(add_24, [2], correction = 0, keepdim = True)\n    getitem_33 = var_mean_6[0]\n    getitem_34 = var_mean_6[1];  var_mean_6 = None\n    add_25 = torch.ops.aten.add.Tensor(getitem_33, 1e-05);  getitem_33 = None\n    rsqrt_6 = torch.ops.aten.rsqrt.default(add_25);  add_25 = None\n    sub_6 = torch.ops.aten.sub.Tensor(add_24, getitem_34);  getitem_34 = None\n    mul_24 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_6);  sub_6 = None\n    mul_25 = torch.ops.aten.mul.Tensor(mul_24, primals_40)\n    add_26 = torch.ops.aten.add.Tensor(mul_25, primals_41);  mul_25 = primals_41 = None\n    view_36 = torch.ops.aten.view.default(add_26, [64, 768]);  add_26 = None\n    permute_24 = torch.ops.aten.permute.default(primals_42, [1, 0]);  primals_42 = None\n    addmm_12 = torch.ops.aten.addmm.default(primals_43, view_36, permute_24);  primals_43 = None\n    view_37 = torch.ops.aten.view.default(addmm_12, [1, 64, 2304]);  addmm_12 = None\n    split_3 = torch.ops.aten.split.Tensor(view_37, 768, 2);  view_37 = None\n    getitem_35 = split_3[0]\n    getitem_36 = split_3[1]\n    getitem_37 = split_3[2];  split_3 = None\n    view_38 = torch.ops.aten.view.default(getitem_36, [1, 64, 12, 64]);  getitem_36 = None\n    permute_25 = torch.ops.aten.permute.default(view_38, [0, 2, 1, 3]);  view_38 = None\n    view_39 = torch.ops.aten.view.default(getitem_35, [1, 64, 12, 64]);  getitem_35 = None\n    permute_26 = torch.ops.aten.permute.default(view_39, [0, 2, 1, 3]);  view_39 = None\n    view_40 = torch.ops.aten.view.default(getitem_37, [1, 64, 12, 64]);  getitem_37 = None\n    permute_27 = torch.ops.aten.permute.default(view_40, [0, 2, 1, 3]);  view_40 = None\n    _scaled_dot_product_efficient_attention_3 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_26, permute_25, permute_27, None, True, 0.0, True)\n    getitem_38 = _scaled_dot_product_efficient_attention_3[0]\n    getitem_39 = _scaled_dot_product_efficient_attention_3[1]\n    getitem_40 = _scaled_dot_product_efficient_attention_3[2]\n    getitem_41 = _scaled_dot_product_efficient_attention_3[3];  _scaled_dot_product_efficient_attention_3 = None\n    permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n    view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None\n    view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None\n    permute_29 = torch.ops.aten.permute.default(primals_44, [1, 0]);  primals_44 = None\n    addmm_13 = torch.ops.aten.addmm.default(primals_45, view_42, permute_29);  primals_45 = view_42 = None\n    view_43 = torch.ops.aten.view.default(addmm_13, [1, 64, 768]);  addmm_13 = None\n    add_27 = torch.ops.aten.add.Tensor(add_24, view_43);  add_24 = view_43 = None\n    var_mean_7 = torch.ops.aten.var_mean.correction(add_27, [2], correction = 0, keepdim = True)\n    getitem_42 = var_mean_7[0]\n    getitem_43 = var_mean_7[1];  var_mean_7 = None\n    add_28 = torch.ops.aten.add.Tensor(getitem_42, 1e-05);  getitem_42 = None\n    rsqrt_7 = torch.ops.aten.rsqrt.default(add_28);  add_28 = None\n    sub_7 = torch.ops.aten.sub.Tensor(add_27, getitem_43);  getitem_43 = None\n    mul_26 = torch.ops.aten.mul.Tensor(sub_7, rsqrt_7);  sub_7 = None\n    mul_27 = torch.ops.aten.mul.Tensor(mul_26, primals_46)\n    add_29 = torch.ops.aten.add.Tensor(mul_27, primals_47);  mul_27 = primals_47 = None\n    view_44 = torch.ops.aten.view.default(add_29, [64, 768]);  add_29 = None\n    permute_30 = torch.ops.aten.permute.default(primals_48, [1, 0]);  primals_48 = None\n    addmm_14 = torch.ops.aten.addmm.default(primals_49, view_44, permute_30);  primals_49 = None\n    view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072])\n    mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n    pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n    mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None\n    add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  view_45 = mul_29 = None\n    mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None\n    tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None\n    add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0);  tanh_3 = None\n    mul_31 = torch.ops.aten.mul.Tensor(mul_28, add_31);  mul_28 = add_31 = None\n    view_46 = torch.ops.aten.view.default(mul_31, [64, 3072]);  mul_31 = None\n    permute_31 = torch.ops.aten.permute.default(primals_50, [1, 0]);  primals_50 = None\n    addmm_15 = torch.ops.aten.addmm.default(primals_51, view_46, permute_31);  primals_51 = None\n    view_47 = torch.ops.aten.view.default(addmm_15, [1, 64, 768]);  addmm_15 = None\n    add_32 = torch.ops.aten.add.Tensor(add_27, view_47);  add_27 = view_47 = None\n    var_mean_8 = torch.ops.aten.var_mean.correction(add_32, [2], correction = 0, keepdim = True)\n    getitem_44 = var_mean_8[0]\n    getitem_45 = var_mean_8[1];  var_mean_8 = None\n    add_33 = torch.ops.aten.add.Tensor(getitem_44, 1e-05);  getitem_44 = None\n    rsqrt_8 = torch.ops.aten.rsqrt.default(add_33);  add_33 = None\n    sub_8 = torch.ops.aten.sub.Tensor(add_32, getitem_45);  getitem_45 = None\n    mul_32 = torch.ops.aten.mul.Tensor(sub_8, rsqrt_8);  sub_8 = None\n    mul_33 = torch.ops.aten.mul.Tensor(mul_32, primals_52)\n    add_34 = torch.ops.aten.add.Tensor(mul_33, primals_53);  mul_33 = primals_53 = None\n    view_48 = torch.ops.aten.view.default(add_34, [64, 768]);  add_34 = None\n    permute_32 = torch.ops.aten.permute.default(primals_54, [1, 0]);  primals_54 = None\n    addmm_16 = torch.ops.aten.addmm.default(primals_55, view_48, permute_32);  primals_55 = None\n    view_49 = torch.ops.aten.view.default(addmm_16, [1, 64, 2304]);  addmm_16 = None\n    split_4 = torch.ops.aten.split.Tensor(view_49, 768, 2);  view_49 = None\n    getitem_46 = split_4[0]\n    getitem_47 = split_4[1]\n    getitem_48 = split_4[2];  split_4 = None\n    view_50 = torch.ops.aten.view.default(getitem_47, [1, 64, 12, 64]);  getitem_47 = None\n    permute_33 = torch.ops.aten.permute.default(view_50, [0, 2, 1, 3]);  view_50 = None\n    view_51 = torch.ops.aten.view.default(getitem_46, [1, 64, 12, 64]);  getitem_46 = None\n    permute_34 = torch.ops.aten.permute.default(view_51, [0, 2, 1, 3]);  view_51 = None\n    view_52 = torch.ops.aten.view.default(getitem_48, [1, 64, 12, 64]);  getitem_48 = None\n    permute_35 = torch.ops.aten.permute.default(view_52, [0, 2, 1, 3]);  view_52 = None\n    _scaled_dot_product_efficient_attention_4 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_34, permute_33, permute_35, None, True, 0.0, True)\n    getitem_49 = _scaled_dot_product_efficient_attention_4[0]\n    getitem_50 = _scaled_dot_product_efficient_attention_4[1]\n    getitem_51 = _scaled_dot_product_efficient_attention_4[2]\n    getitem_52 = _scaled_dot_product_efficient_attention_4[3];  _scaled_dot_product_efficient_attention_4 = None\n    permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n    view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None\n    view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None\n    permute_37 = torch.ops.aten.permute.default(primals_56, [1, 0]);  primals_56 = None\n    addmm_17 = torch.ops.aten.addmm.default(primals_57, view_54, permute_37);  primals_57 = view_54 = None\n    view_55 = torch.ops.aten.view.default(addmm_17, [1, 64, 768]);  addmm_17 = None\n    add_35 = torch.ops.aten.add.Tensor(add_32, view_55);  add_32 = view_55 = None\n    var_mean_9 = torch.ops.aten.var_mean.correction(add_35, [2], correction = 0, keepdim = True)\n    getitem_53 = var_mean_9[0]\n    getitem_54 = var_mean_9[1];  var_mean_9 = None\n    add_36 = torch.ops.aten.add.Tensor(getitem_53, 1e-05);  getitem_53 = None\n    rsqrt_9 = torch.ops.aten.rsqrt.default(add_36);  add_36 = None\n    sub_9 = torch.ops.aten.sub.Tensor(add_35, getitem_54);  getitem_54 = None\n    mul_34 = torch.ops.aten.mul.Tensor(sub_9, rsqrt_9);  sub_9 = None\n    mul_35 = torch.ops.aten.mul.Tensor(mul_34, primals_58)\n    add_37 = torch.ops.aten.add.Tensor(mul_35, primals_59);  mul_35 = primals_59 = None\n    view_56 = torch.ops.aten.view.default(add_37, [64, 768]);  add_37 = None\n    permute_38 = torch.ops.aten.permute.default(primals_60, [1, 0]);  primals_60 = None\n    addmm_18 = torch.ops.aten.addmm.default(primals_61, view_56, permute_38);  primals_61 = None\n    view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072])\n    mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n    pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n    mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None\n    add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  view_57 = mul_37 = None\n    mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None\n    tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None\n    add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0);  tanh_4 = None\n    mul_39 = torch.ops.aten.mul.Tensor(mul_36, add_39);  mul_36 = add_39 = None\n    view_58 = torch.ops.aten.view.default(mul_39, [64, 3072]);  mul_39 = None\n    permute_39 = torch.ops.aten.permute.default(primals_62, [1, 0]);  primals_62 = None\n    addmm_19 = torch.ops.aten.addmm.default(primals_63, view_58, permute_39);  primals_63 = None\n    view_59 = torch.ops.aten.view.default(addmm_19, [1, 64, 768]);  addmm_19 = None\n    add_40 = torch.ops.aten.add.Tensor(add_35, view_59);  add_35 = view_59 = None\n    var_mean_10 = torch.ops.aten.var_mean.correction(add_40, [2], correction = 0, keepdim = True)\n    getitem_55 = var_mean_10[0]\n    getitem_56 = var_mean_10[1];  var_mean_10 = None\n    add_41 = torch.ops.aten.add.Tensor(getitem_55, 1e-05);  getitem_55 = None\n    rsqrt_10 = torch.ops.aten.rsqrt.default(add_41);  add_41 = None\n    sub_10 = torch.ops.aten.sub.Tensor(add_40, getitem_56);  getitem_56 = None\n    mul_40 = torch.ops.aten.mul.Tensor(sub_10, rsqrt_10);  sub_10 = None\n    mul_41 = torch.ops.aten.mul.Tensor(mul_40, primals_64)\n    add_42 = torch.ops.aten.add.Tensor(mul_41, primals_65);  mul_41 = primals_65 = None\n    view_60 = torch.ops.aten.view.default(add_42, [64, 768]);  add_42 = None\n    permute_40 = torch.ops.aten.permute.default(primals_66, [1, 0]);  primals_66 = None\n    addmm_20 = torch.ops.aten.addmm.default(primals_67, view_60, permute_40);  primals_67 = None\n    view_61 = torch.ops.aten.view.default(addmm_20, [1, 64, 2304]);  addmm_20 = None\n    split_5 = torch.ops.aten.split.Tensor(view_61, 768, 2);  view_61 = None\n    getitem_57 = split_5[0]\n    getitem_58 = split_5[1]\n    getitem_59 = split_5[2];  split_5 = None\n    view_62 = torch.ops.aten.view.default(getitem_58, [1, 64, 12, 64]);  getitem_58 = None\n    permute_41 = torch.ops.aten.permute.default(view_62, [0, 2, 1, 3]);  view_62 = None\n    view_63 = torch.ops.aten.view.default(getitem_57, [1, 64, 12, 64]);  getitem_57 = None\n    permute_42 = torch.ops.aten.permute.default(view_63, [0, 2, 1, 3]);  view_63 = None\n    view_64 = torch.ops.aten.view.default(getitem_59, [1, 64, 12, 64]);  getitem_59 = None\n    permute_43 = torch.ops.aten.permute.default(view_64, [0, 2, 1, 3]);  view_64 = None\n    _scaled_dot_product_efficient_attention_5 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_42, permute_41, permute_43, None, True, 0.0, True)\n    getitem_60 = _scaled_dot_product_efficient_attention_5[0]\n    getitem_61 = _scaled_dot_product_efficient_attention_5[1]\n    getitem_62 = _scaled_dot_product_efficient_attention_5[2]\n    getitem_63 = _scaled_dot_product_efficient_attention_5[3];  _scaled_dot_product_efficient_attention_5 = None\n    permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n    view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None\n    view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None\n    permute_45 = torch.ops.aten.permute.default(primals_68, [1, 0]);  primals_68 = None\n    addmm_21 = torch.ops.aten.addmm.default(primals_69, view_66, permute_45);  primals_69 = view_66 = None\n    view_67 = torch.ops.aten.view.default(addmm_21, [1, 64, 768]);  addmm_21 = None\n    add_43 = torch.ops.aten.add.Tensor(add_40, view_67);  add_40 = view_67 = None\n    var_mean_11 = torch.ops.aten.var_mean.correction(add_43, [2], correction = 0, keepdim = True)\n    getitem_64 = var_mean_11[0]\n    getitem_65 = var_mean_11[1];  var_mean_11 = None\n    add_44 = torch.ops.aten.add.Tensor(getitem_64, 1e-05);  getitem_64 = None\n    rsqrt_11 = torch.ops.aten.rsqrt.default(add_44);  add_44 = None\n    sub_11 = torch.ops.aten.sub.Tensor(add_43, getitem_65);  getitem_65 = None\n    mul_42 = torch.ops.aten.mul.Tensor(sub_11, rsqrt_11);  sub_11 = None\n    mul_43 = torch.ops.aten.mul.Tensor(mul_42, primals_70)\n    add_45 = torch.ops.aten.add.Tensor(mul_43, primals_71);  mul_43 = primals_71 = None\n    view_68 = torch.ops.aten.view.default(add_45, [64, 768]);  add_45 = None\n    permute_46 = torch.ops.aten.permute.default(primals_72, [1, 0]);  primals_72 = None\n    addmm_22 = torch.ops.aten.addmm.default(primals_73, view_68, permute_46);  primals_73 = None\n    view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072])\n    mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n    pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n    mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None\n    add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  view_69 = mul_45 = None\n    mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None\n    tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None\n    add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0);  tanh_5 = None\n    mul_47 = torch.ops.aten.mul.Tensor(mul_44, add_47);  mul_44 = add_47 = None\n    view_70 = torch.ops.aten.view.default(mul_47, [64, 3072]);  mul_47 = None\n    permute_47 = torch.ops.aten.permute.default(primals_74, [1, 0]);  primals_74 = None\n    addmm_23 = torch.ops.aten.addmm.default(primals_75, view_70, permute_47);  primals_75 = None\n    view_71 = torch.ops.aten.view.default(addmm_23, [1, 64, 768]);  addmm_23 = None\n    add_48 = torch.ops.aten.add.Tensor(add_43, view_71);  add_43 = view_71 = None\n    var_mean_12 = torch.ops.aten.var_mean.correction(add_48, [2], correction = 0, keepdim = True)\n    getitem_66 = var_mean_12[0]\n    getitem_67 = var_mean_12[1];  var_mean_12 = None\n    add_49 = torch.ops.aten.add.Tensor(getitem_66, 1e-05);  getitem_66 = None\n    rsqrt_12 = torch.ops.aten.rsqrt.default(add_49);  add_49 = None\n    sub_12 = torch.ops.aten.sub.Tensor(add_48, getitem_67);  getitem_67 = None\n    mul_48 = torch.ops.aten.mul.Tensor(sub_12, rsqrt_12);  sub_12 = None\n    mul_49 = torch.ops.aten.mul.Tensor(mul_48, primals_76)\n    add_50 = torch.ops.aten.add.Tensor(mul_49, primals_77);  mul_49 = primals_77 = None\n    view_72 = torch.ops.aten.view.default(add_50, [64, 768]);  add_50 = None\n    permute_48 = torch.ops.aten.permute.default(primals_78, [1, 0]);  primals_78 = None\n    addmm_24 = torch.ops.aten.addmm.default(primals_79, view_72, permute_48);  primals_79 = None\n    view_73 = torch.ops.aten.view.default(addmm_24, [1, 64, 2304]);  addmm_24 = None\n    split_6 = torch.ops.aten.split.Tensor(view_73, 768, 2);  view_73 = None\n    getitem_68 = split_6[0]\n    getitem_69 = split_6[1]\n    getitem_70 = split_6[2];  split_6 = None\n    view_74 = torch.ops.aten.view.default(getitem_69, [1, 64, 12, 64]);  getitem_69 = None\n    permute_49 = torch.ops.aten.permute.default(view_74, [0, 2, 1, 3]);  view_74 = None\n    view_75 = torch.ops.aten.view.default(getitem_68, [1, 64, 12, 64]);  getitem_68 = None\n    permute_50 = torch.ops.aten.permute.default(view_75, [0, 2, 1, 3]);  view_75 = None\n    view_76 = torch.ops.aten.view.default(getitem_70, [1, 64, 12, 64]);  getitem_70 = None\n    permute_51 = torch.ops.aten.permute.default(view_76, [0, 2, 1, 3]);  view_76 = None\n    _scaled_dot_product_efficient_attention_6 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_50, permute_49, permute_51, None, True, 0.0, True)\n    getitem_71 = _scaled_dot_product_efficient_attention_6[0]\n    getitem_72 = _scaled_dot_product_efficient_attention_6[1]\n    getitem_73 = _scaled_dot_product_efficient_attention_6[2]\n    getitem_74 = _scaled_dot_product_efficient_attention_6[3];  _scaled_dot_product_efficient_attention_6 = None\n    permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n    view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None\n    view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None\n    permute_53 = torch.ops.aten.permute.default(primals_80, [1, 0]);  primals_80 = None\n    addmm_25 = torch.ops.aten.addmm.default(primals_81, view_78, permute_53);  primals_81 = view_78 = None\n    view_79 = torch.ops.aten.view.default(addmm_25, [1, 64, 768]);  addmm_25 = None\n    add_51 = torch.ops.aten.add.Tensor(add_48, view_79);  add_48 = view_79 = None\n    var_mean_13 = torch.ops.aten.var_mean.correction(add_51, [2], correction = 0, keepdim = True)\n    getitem_75 = var_mean_13[0]\n    getitem_76 = var_mean_13[1];  var_mean_13 = None\n    add_52 = torch.ops.aten.add.Tensor(getitem_75, 1e-05);  getitem_75 = None\n    rsqrt_13 = torch.ops.aten.rsqrt.default(add_52);  add_52 = None\n    sub_13 = torch.ops.aten.sub.Tensor(add_51, getitem_76);  getitem_76 = None\n    mul_50 = torch.ops.aten.mul.Tensor(sub_13, rsqrt_13);  sub_13 = None\n    mul_51 = torch.ops.aten.mul.Tensor(mul_50, primals_82)\n    add_53 = torch.ops.aten.add.Tensor(mul_51, primals_83);  mul_51 = primals_83 = None\n    view_80 = torch.ops.aten.view.default(add_53, [64, 768]);  add_53 = None\n    permute_54 = torch.ops.aten.permute.default(primals_84, [1, 0]);  primals_84 = None\n    addmm_26 = torch.ops.aten.addmm.default(primals_85, view_80, permute_54);  primals_85 = None\n    view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072])\n    mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n    pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n    mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None\n    add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  view_81 = mul_53 = None\n    mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None\n    tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None\n    add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0);  tanh_6 = None\n    mul_55 = torch.ops.aten.mul.Tensor(mul_52, add_55);  mul_52 = add_55 = None\n    view_82 = torch.ops.aten.view.default(mul_55, [64, 3072]);  mul_55 = None\n    permute_55 = torch.ops.aten.permute.default(primals_86, [1, 0]);  primals_86 = None\n    addmm_27 = torch.ops.aten.addmm.default(primals_87, view_82, permute_55);  primals_87 = None\n    view_83 = torch.ops.aten.view.default(addmm_27, [1, 64, 768]);  addmm_27 = None\n    add_56 = torch.ops.aten.add.Tensor(add_51, view_83);  add_51 = view_83 = None\n    var_mean_14 = torch.ops.aten.var_mean.correction(add_56, [2], correction = 0, keepdim = True)\n    getitem_77 = var_mean_14[0]\n    getitem_78 = var_mean_14[1];  var_mean_14 = None\n    add_57 = torch.ops.aten.add.Tensor(getitem_77, 1e-05);  getitem_77 = None\n    rsqrt_14 = torch.ops.aten.rsqrt.default(add_57);  add_57 = None\n    sub_14 = torch.ops.aten.sub.Tensor(add_56, getitem_78);  getitem_78 = None\n    mul_56 = torch.ops.aten.mul.Tensor(sub_14, rsqrt_14);  sub_14 = None\n    mul_57 = torch.ops.aten.mul.Tensor(mul_56, primals_88)\n    add_58 = torch.ops.aten.add.Tensor(mul_57, primals_89);  mul_57 = primals_89 = None\n    view_84 = torch.ops.aten.view.default(add_58, [64, 768]);  add_58 = None\n    permute_56 = torch.ops.aten.permute.default(primals_90, [1, 0]);  primals_90 = None\n    addmm_28 = torch.ops.aten.addmm.default(primals_91, view_84, permute_56);  primals_91 = None\n    view_85 = torch.ops.aten.view.default(addmm_28, [1, 64, 2304]);  addmm_28 = None\n    split_7 = torch.ops.aten.split.Tensor(view_85, 768, 2);  view_85 = None\n    getitem_79 = split_7[0]\n    getitem_80 = split_7[1]\n    getitem_81 = split_7[2];  split_7 = None\n    view_86 = torch.ops.aten.view.default(getitem_80, [1, 64, 12, 64]);  getitem_80 = None\n    permute_57 = torch.ops.aten.permute.default(view_86, [0, 2, 1, 3]);  view_86 = None\n    view_87 = torch.ops.aten.view.default(getitem_79, [1, 64, 12, 64]);  getitem_79 = None\n    permute_58 = torch.ops.aten.permute.default(view_87, [0, 2, 1, 3]);  view_87 = None\n    view_88 = torch.ops.aten.view.default(getitem_81, [1, 64, 12, 64]);  getitem_81 = None\n    permute_59 = torch.ops.aten.permute.default(view_88, [0, 2, 1, 3]);  view_88 = None\n    _scaled_dot_product_efficient_attention_7 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_58, permute_57, permute_59, None, True, 0.0, True)\n    getitem_82 = _scaled_dot_product_efficient_attention_7[0]\n    getitem_83 = _scaled_dot_product_efficient_attention_7[1]\n    getitem_84 = _scaled_dot_product_efficient_attention_7[2]\n    getitem_85 = _scaled_dot_product_efficient_attention_7[3];  _scaled_dot_product_efficient_attention_7 = None\n    permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n    view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None\n    view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None\n    permute_61 = torch.ops.aten.permute.default(primals_92, [1, 0]);  primals_92 = None\n    addmm_29 = torch.ops.aten.addmm.default(primals_93, view_90, permute_61);  primals_93 = view_90 = None\n    view_91 = torch.ops.aten.view.default(addmm_29, [1, 64, 768]);  addmm_29 = None\n    add_59 = torch.ops.aten.add.Tensor(add_56, view_91);  add_56 = view_91 = None\n    var_mean_15 = torch.ops.aten.var_mean.correction(add_59, [2], correction = 0, keepdim = True)\n    getitem_86 = var_mean_15[0]\n    getitem_87 = var_mean_15[1];  var_mean_15 = None\n    add_60 = torch.ops.aten.add.Tensor(getitem_86, 1e-05);  getitem_86 = None\n    rsqrt_15 = torch.ops.aten.rsqrt.default(add_60);  add_60 = None\n    sub_15 = torch.ops.aten.sub.Tensor(add_59, getitem_87);  getitem_87 = None\n    mul_58 = torch.ops.aten.mul.Tensor(sub_15, rsqrt_15);  sub_15 = None\n    mul_59 = torch.ops.aten.mul.Tensor(mul_58, primals_94)\n    add_61 = torch.ops.aten.add.Tensor(mul_59, primals_95);  mul_59 = primals_95 = None\n    view_92 = torch.ops.aten.view.default(add_61, [64, 768]);  add_61 = None\n    permute_62 = torch.ops.aten.permute.default(primals_96, [1, 0]);  primals_96 = None\n    addmm_30 = torch.ops.aten.addmm.default(primals_97, view_92, permute_62);  primals_97 = None\n    view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072])\n    mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n    pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n    mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None\n    add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  view_93 = mul_61 = None\n    mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None\n    tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None\n    add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0);  tanh_7 = None\n    mul_63 = torch.ops.aten.mul.Tensor(mul_60, add_63);  mul_60 = add_63 = None\n    view_94 = torch.ops.aten.view.default(mul_63, [64, 3072]);  mul_63 = None\n    permute_63 = torch.ops.aten.permute.default(primals_98, [1, 0]);  primals_98 = None\n    addmm_31 = torch.ops.aten.addmm.default(primals_99, view_94, permute_63);  primals_99 = None\n    view_95 = torch.ops.aten.view.default(addmm_31, [1, 64, 768]);  addmm_31 = None\n    add_64 = torch.ops.aten.add.Tensor(add_59, view_95);  add_59 = view_95 = None\n    var_mean_16 = torch.ops.aten.var_mean.correction(add_64, [2], correction = 0, keepdim = True)\n    getitem_88 = var_mean_16[0]\n    getitem_89 = var_mean_16[1];  var_mean_16 = None\n    add_65 = torch.ops.aten.add.Tensor(getitem_88, 1e-05);  getitem_88 = None\n    rsqrt_16 = torch.ops.aten.rsqrt.default(add_65);  add_65 = None\n    sub_16 = torch.ops.aten.sub.Tensor(add_64, getitem_89);  getitem_89 = None\n    mul_64 = torch.ops.aten.mul.Tensor(sub_16, rsqrt_16);  sub_16 = None\n    mul_65 = torch.ops.aten.mul.Tensor(mul_64, primals_100)\n    add_66 = torch.ops.aten.add.Tensor(mul_65, primals_101);  mul_65 = primals_101 = None\n    view_96 = torch.ops.aten.view.default(add_66, [64, 768]);  add_66 = None\n    permute_64 = torch.ops.aten.permute.default(primals_102, [1, 0]);  primals_102 = None\n    addmm_32 = torch.ops.aten.addmm.default(primals_103, view_96, permute_64);  primals_103 = None\n    view_97 = torch.ops.aten.view.default(addmm_32, [1, 64, 2304]);  addmm_32 = None\n    split_8 = torch.ops.aten.split.Tensor(view_97, 768, 2);  view_97 = None\n    getitem_90 = split_8[0]\n    getitem_91 = split_8[1]\n    getitem_92 = split_8[2];  split_8 = None\n    view_98 = torch.ops.aten.view.default(getitem_91, [1, 64, 12, 64]);  getitem_91 = None\n    permute_65 = torch.ops.aten.permute.default(view_98, [0, 2, 1, 3]);  view_98 = None\n    view_99 = torch.ops.aten.view.default(getitem_90, [1, 64, 12, 64]);  getitem_90 = None\n    permute_66 = torch.ops.aten.permute.default(view_99, [0, 2, 1, 3]);  view_99 = None\n    view_100 = torch.ops.aten.view.default(getitem_92, [1, 64, 12, 64]);  getitem_92 = None\n    permute_67 = torch.ops.aten.permute.default(view_100, [0, 2, 1, 3]);  view_100 = None\n    _scaled_dot_product_efficient_attention_8 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_66, permute_65, permute_67, None, True, 0.0, True)\n    getitem_93 = _scaled_dot_product_efficient_attention_8[0]\n    getitem_94 = _scaled_dot_product_efficient_attention_8[1]\n    getitem_95 = _scaled_dot_product_efficient_attention_8[2]\n    getitem_96 = _scaled_dot_product_efficient_attention_8[3];  _scaled_dot_product_efficient_attention_8 = None\n    permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n    view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None\n    view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None\n    permute_69 = torch.ops.aten.permute.default(primals_104, [1, 0]);  primals_104 = None\n    addmm_33 = torch.ops.aten.addmm.default(primals_105, view_102, permute_69);  primals_105 = view_102 = None\n    view_103 = torch.ops.aten.view.default(addmm_33, [1, 64, 768]);  addmm_33 = None\n    add_67 = torch.ops.aten.add.Tensor(add_64, view_103);  add_64 = view_103 = None\n    var_mean_17 = torch.ops.aten.var_mean.correction(add_67, [2], correction = 0, keepdim = True)\n    getitem_97 = var_mean_17[0]\n    getitem_98 = var_mean_17[1];  var_mean_17 = None\n    add_68 = torch.ops.aten.add.Tensor(getitem_97, 1e-05);  getitem_97 = None\n    rsqrt_17 = torch.ops.aten.rsqrt.default(add_68);  add_68 = None\n    sub_17 = torch.ops.aten.sub.Tensor(add_67, getitem_98);  getitem_98 = None\n    mul_66 = torch.ops.aten.mul.Tensor(sub_17, rsqrt_17);  sub_17 = None\n    mul_67 = torch.ops.aten.mul.Tensor(mul_66, primals_106)\n    add_69 = torch.ops.aten.add.Tensor(mul_67, primals_107);  mul_67 = primals_107 = None\n    view_104 = torch.ops.aten.view.default(add_69, [64, 768]);  add_69 = None\n    permute_70 = torch.ops.aten.permute.default(primals_108, [1, 0]);  primals_108 = None\n    addmm_34 = torch.ops.aten.addmm.default(primals_109, view_104, permute_70);  primals_109 = None\n    view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072])\n    mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n    pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n    mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None\n    add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  view_105 = mul_69 = None\n    mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None\n    tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None\n    add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0);  tanh_8 = None\n    mul_71 = torch.ops.aten.mul.Tensor(mul_68, add_71);  mul_68 = add_71 = None\n    view_106 = torch.ops.aten.view.default(mul_71, [64, 3072]);  mul_71 = None\n    permute_71 = torch.ops.aten.permute.default(primals_110, [1, 0]);  primals_110 = None\n    addmm_35 = torch.ops.aten.addmm.default(primals_111, view_106, permute_71);  primals_111 = None\n    view_107 = torch.ops.aten.view.default(addmm_35, [1, 64, 768]);  addmm_35 = None\n    add_72 = torch.ops.aten.add.Tensor(add_67, view_107);  add_67 = view_107 = None\n    var_mean_18 = torch.ops.aten.var_mean.correction(add_72, [2], correction = 0, keepdim = True)\n    getitem_99 = var_mean_18[0]\n    getitem_100 = var_mean_18[1];  var_mean_18 = None\n    add_73 = torch.ops.aten.add.Tensor(getitem_99, 1e-05);  getitem_99 = None\n    rsqrt_18 = torch.ops.aten.rsqrt.default(add_73);  add_73 = None\n    sub_18 = torch.ops.aten.sub.Tensor(add_72, getitem_100);  getitem_100 = None\n    mul_72 = torch.ops.aten.mul.Tensor(sub_18, rsqrt_18);  sub_18 = None\n    mul_73 = torch.ops.aten.mul.Tensor(mul_72, primals_112)\n    add_74 = torch.ops.aten.add.Tensor(mul_73, primals_113);  mul_73 = primals_113 = None\n    view_108 = torch.ops.aten.view.default(add_74, [64, 768]);  add_74 = None\n    permute_72 = torch.ops.aten.permute.default(primals_114, [1, 0]);  primals_114 = None\n    addmm_36 = torch.ops.aten.addmm.default(primals_115, view_108, permute_72);  primals_115 = None\n    view_109 = torch.ops.aten.view.default(addmm_36, [1, 64, 2304]);  addmm_36 = None\n    split_9 = torch.ops.aten.split.Tensor(view_109, 768, 2);  view_109 = None\n    getitem_101 = split_9[0]\n    getitem_102 = split_9[1]\n    getitem_103 = split_9[2];  split_9 = None\n    view_110 = torch.ops.aten.view.default(getitem_102, [1, 64, 12, 64]);  getitem_102 = None\n    permute_73 = torch.ops.aten.permute.default(view_110, [0, 2, 1, 3]);  view_110 = None\n    view_111 = torch.ops.aten.view.default(getitem_101, [1, 64, 12, 64]);  getitem_101 = None\n    permute_74 = torch.ops.aten.permute.default(view_111, [0, 2, 1, 3]);  view_111 = None\n    view_112 = torch.ops.aten.view.default(getitem_103, [1, 64, 12, 64]);  getitem_103 = None\n    permute_75 = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]);  view_112 = None\n    _scaled_dot_product_efficient_attention_9 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_74, permute_73, permute_75, None, True, 0.0, True)\n    getitem_104 = _scaled_dot_product_efficient_attention_9[0]\n    getitem_105 = _scaled_dot_product_efficient_attention_9[1]\n    getitem_106 = _scaled_dot_product_efficient_attention_9[2]\n    getitem_107 = _scaled_dot_product_efficient_attention_9[3];  _scaled_dot_product_efficient_attention_9 = None\n    permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n    view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None\n    view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None\n    permute_77 = torch.ops.aten.permute.default(primals_116, [1, 0]);  primals_116 = None\n    addmm_37 = torch.ops.aten.addmm.default(primals_117, view_114, permute_77);  primals_117 = view_114 = None\n    view_115 = torch.ops.aten.view.default(addmm_37, [1, 64, 768]);  addmm_37 = None\n    add_75 = torch.ops.aten.add.Tensor(add_72, view_115);  add_72 = view_115 = None\n    var_mean_19 = torch.ops.aten.var_mean.correction(add_75, [2], correction = 0, keepdim = True)\n    getitem_108 = var_mean_19[0]\n    getitem_109 = var_mean_19[1];  var_mean_19 = None\n    add_76 = torch.ops.aten.add.Tensor(getitem_108, 1e-05);  getitem_108 = None\n    rsqrt_19 = torch.ops.aten.rsqrt.default(add_76);  add_76 = None\n    sub_19 = torch.ops.aten.sub.Tensor(add_75, getitem_109);  getitem_109 = None\n    mul_74 = torch.ops.aten.mul.Tensor(sub_19, rsqrt_19);  sub_19 = None\n    mul_75 = torch.ops.aten.mul.Tensor(mul_74, primals_118)\n    add_77 = torch.ops.aten.add.Tensor(mul_75, primals_119);  mul_75 = primals_119 = None\n    view_116 = torch.ops.aten.view.default(add_77, [64, 768]);  add_77 = None\n    permute_78 = torch.ops.aten.permute.default(primals_120, [1, 0]);  primals_120 = None\n    addmm_38 = torch.ops.aten.addmm.default(primals_121, view_116, permute_78);  primals_121 = None\n    view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072])\n    mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n    pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n    mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None\n    add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  view_117 = mul_77 = None\n    mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None\n    tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None\n    add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0);  tanh_9 = None\n    mul_79 = torch.ops.aten.mul.Tensor(mul_76, add_79);  mul_76 = add_79 = None\n    view_118 = torch.ops.aten.view.default(mul_79, [64, 3072]);  mul_79 = None\n    permute_79 = torch.ops.aten.permute.default(primals_122, [1, 0]);  primals_122 = None\n    addmm_39 = torch.ops.aten.addmm.default(primals_123, view_118, permute_79);  primals_123 = None\n    view_119 = torch.ops.aten.view.default(addmm_39, [1, 64, 768]);  addmm_39 = None\n    add_80 = torch.ops.aten.add.Tensor(add_75, view_119);  add_75 = view_119 = None\n    var_mean_20 = torch.ops.aten.var_mean.correction(add_80, [2], correction = 0, keepdim = True)\n    getitem_110 = var_mean_20[0]\n    getitem_111 = var_mean_20[1];  var_mean_20 = None\n    add_81 = torch.ops.aten.add.Tensor(getitem_110, 1e-05);  getitem_110 = None\n    rsqrt_20 = torch.ops.aten.rsqrt.default(add_81);  add_81 = None\n    sub_20 = torch.ops.aten.sub.Tensor(add_80, getitem_111);  getitem_111 = None\n    mul_80 = torch.ops.aten.mul.Tensor(sub_20, rsqrt_20);  sub_20 = None\n    mul_81 = torch.ops.aten.mul.Tensor(mul_80, primals_124)\n    add_82 = torch.ops.aten.add.Tensor(mul_81, primals_125);  mul_81 = primals_125 = None\n    view_120 = torch.ops.aten.view.default(add_82, [64, 768]);  add_82 = None\n    permute_80 = torch.ops.aten.permute.default(primals_126, [1, 0]);  primals_126 = None\n    addmm_40 = torch.ops.aten.addmm.default(primals_127, view_120, permute_80);  primals_127 = None\n    view_121 = torch.ops.aten.view.default(addmm_40, [1, 64, 2304]);  addmm_40 = None\n    split_10 = torch.ops.aten.split.Tensor(view_121, 768, 2);  view_121 = None\n    getitem_112 = split_10[0]\n    getitem_113 = split_10[1]\n    getitem_114 = split_10[2];  split_10 = None\n    view_122 = torch.ops.aten.view.default(getitem_113, [1, 64, 12, 64]);  getitem_113 = None\n    permute_81 = torch.ops.aten.permute.default(view_122, [0, 2, 1, 3]);  view_122 = None\n    view_123 = torch.ops.aten.view.default(getitem_112, [1, 64, 12, 64]);  getitem_112 = None\n    permute_82 = torch.ops.aten.permute.default(view_123, [0, 2, 1, 3]);  view_123 = None\n    view_124 = torch.ops.aten.view.default(getitem_114, [1, 64, 12, 64]);  getitem_114 = None\n    permute_83 = torch.ops.aten.permute.default(view_124, [0, 2, 1, 3]);  view_124 = None\n    _scaled_dot_product_efficient_attention_10 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_82, permute_81, permute_83, None, True, 0.0, True)\n    getitem_115 = _scaled_dot_product_efficient_attention_10[0]\n    getitem_116 = _scaled_dot_product_efficient_attention_10[1]\n    getitem_117 = _scaled_dot_product_efficient_attention_10[2]\n    getitem_118 = _scaled_dot_product_efficient_attention_10[3];  _scaled_dot_product_efficient_attention_10 = None\n    permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n    view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None\n    view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None\n    permute_85 = torch.ops.aten.permute.default(primals_128, [1, 0]);  primals_128 = None\n    addmm_41 = torch.ops.aten.addmm.default(primals_129, view_126, permute_85);  primals_129 = view_126 = None\n    view_127 = torch.ops.aten.view.default(addmm_41, [1, 64, 768]);  addmm_41 = None\n    add_83 = torch.ops.aten.add.Tensor(add_80, view_127);  add_80 = view_127 = None\n    var_mean_21 = torch.ops.aten.var_mean.correction(add_83, [2], correction = 0, keepdim = True)\n    getitem_119 = var_mean_21[0]\n    getitem_120 = var_mean_21[1];  var_mean_21 = None\n    add_84 = torch.ops.aten.add.Tensor(getitem_119, 1e-05);  getitem_119 = None\n    rsqrt_21 = torch.ops.aten.rsqrt.default(add_84);  add_84 = None\n    sub_21 = torch.ops.aten.sub.Tensor(add_83, getitem_120);  getitem_120 = None\n    mul_82 = torch.ops.aten.mul.Tensor(sub_21, rsqrt_21);  sub_21 = None\n    mul_83 = torch.ops.aten.mul.Tensor(mul_82, primals_130)\n    add_85 = torch.ops.aten.add.Tensor(mul_83, primals_131);  mul_83 = primals_131 = None\n    view_128 = torch.ops.aten.view.default(add_85, [64, 768]);  add_85 = None\n    permute_86 = torch.ops.aten.permute.default(primals_132, [1, 0]);  primals_132 = None\n    addmm_42 = torch.ops.aten.addmm.default(primals_133, view_128, permute_86);  primals_133 = None\n    view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072])\n    mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n    pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n    mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None\n    add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  view_129 = mul_85 = None\n    mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None\n    tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None\n    add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0);  tanh_10 = None\n    mul_87 = torch.ops.aten.mul.Tensor(mul_84, add_87);  mul_84 = add_87 = None\n    view_130 = torch.ops.aten.view.default(mul_87, [64, 3072]);  mul_87 = None\n    permute_87 = torch.ops.aten.permute.default(primals_134, [1, 0]);  primals_134 = None\n    addmm_43 = torch.ops.aten.addmm.default(primals_135, view_130, permute_87);  primals_135 = None\n    view_131 = torch.ops.aten.view.default(addmm_43, [1, 64, 768]);  addmm_43 = None\n    add_88 = torch.ops.aten.add.Tensor(add_83, view_131);  add_83 = view_131 = None\n    var_mean_22 = torch.ops.aten.var_mean.correction(add_88, [2], correction = 0, keepdim = True)\n    getitem_121 = var_mean_22[0]\n    getitem_122 = var_mean_22[1];  var_mean_22 = None\n    add_89 = torch.ops.aten.add.Tensor(getitem_121, 1e-05);  getitem_121 = None\n    rsqrt_22 = torch.ops.aten.rsqrt.default(add_89);  add_89 = None\n    sub_22 = torch.ops.aten.sub.Tensor(add_88, getitem_122);  getitem_122 = None\n    mul_88 = torch.ops.aten.mul.Tensor(sub_22, rsqrt_22);  sub_22 = None\n    mul_89 = torch.ops.aten.mul.Tensor(mul_88, primals_136)\n    add_90 = torch.ops.aten.add.Tensor(mul_89, primals_137);  mul_89 = primals_137 = None\n    view_132 = torch.ops.aten.view.default(add_90, [64, 768]);  add_90 = None\n    permute_88 = torch.ops.aten.permute.default(primals_138, [1, 0]);  primals_138 = None\n    addmm_44 = torch.ops.aten.addmm.default(primals_139, view_132, permute_88);  primals_139 = None\n    view_133 = torch.ops.aten.view.default(addmm_44, [1, 64, 2304]);  addmm_44 = None\n    split_11 = torch.ops.aten.split.Tensor(view_133, 768, 2);  view_133 = None\n    getitem_123 = split_11[0]\n    getitem_124 = split_11[1]\n    getitem_125 = split_11[2];  split_11 = None\n    view_134 = torch.ops.aten.view.default(getitem_124, [1, 64, 12, 64]);  getitem_124 = None\n    permute_89 = torch.ops.aten.permute.default(view_134, [0, 2, 1, 3]);  view_134 = None\n    view_135 = torch.ops.aten.view.default(getitem_123, [1, 64, 12, 64]);  getitem_123 = None\n    permute_90 = torch.ops.aten.permute.default(view_135, [0, 2, 1, 3]);  view_135 = None\n    view_136 = torch.ops.aten.view.default(getitem_125, [1, 64, 12, 64]);  getitem_125 = None\n    permute_91 = torch.ops.aten.permute.default(view_136, [0, 2, 1, 3]);  view_136 = None\n    _scaled_dot_product_efficient_attention_11 = torch.ops.aten._scaled_dot_product_efficient_attention.default(permute_90, permute_89, permute_91, None, True, 0.0, True)\n    getitem_126 = _scaled_dot_product_efficient_attention_11[0]\n    getitem_127 = _scaled_dot_product_efficient_attention_11[1]\n    getitem_128 = _scaled_dot_product_efficient_attention_11[2]\n    getitem_129 = _scaled_dot_product_efficient_attention_11[3];  _scaled_dot_product_efficient_attention_11 = None\n    permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n    view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None\n    view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None\n    permute_93 = torch.ops.aten.permute.default(primals_140, [1, 0]);  primals_140 = None\n    addmm_45 = torch.ops.aten.addmm.default(primals_141, view_138, permute_93);  primals_141 = view_138 = None\n    view_139 = torch.ops.aten.view.default(addmm_45, [1, 64, 768]);  addmm_45 = None\n    add_91 = torch.ops.aten.add.Tensor(add_88, view_139);  add_88 = view_139 = None\n    var_mean_23 = torch.ops.aten.var_mean.correction(add_91, [2], correction = 0, keepdim = True)\n    getitem_130 = var_mean_23[0]\n    getitem_131 = var_mean_23[1];  var_mean_23 = None\n    add_92 = torch.ops.aten.add.Tensor(getitem_130, 1e-05);  getitem_130 = None\n    rsqrt_23 = torch.ops.aten.rsqrt.default(add_92);  add_92 = None\n    sub_23 = torch.ops.aten.sub.Tensor(add_91, getitem_131);  getitem_131 = None\n    mul_90 = torch.ops.aten.mul.Tensor(sub_23, rsqrt_23);  sub_23 = None\n    mul_91 = torch.ops.aten.mul.Tensor(mul_90, primals_142)\n    add_93 = torch.ops.aten.add.Tensor(mul_91, primals_143);  mul_91 = primals_143 = None\n    view_140 = torch.ops.aten.view.default(add_93, [64, 768]);  add_93 = None\n    permute_94 = torch.ops.aten.permute.default(primals_144, [1, 0]);  primals_144 = None\n    addmm_46 = torch.ops.aten.addmm.default(primals_145, view_140, permute_94);  primals_145 = None\n    view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072])\n    mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n    pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n    mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None\n    add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  view_141 = mul_93 = None\n    mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None\n    tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None\n    add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0);  tanh_11 = None\n    mul_95 = torch.ops.aten.mul.Tensor(mul_92, add_95);  mul_92 = add_95 = None\n    view_142 = torch.ops.aten.view.default(mul_95, [64, 3072]);  mul_95 = None\n    permute_95 = torch.ops.aten.permute.default(primals_146, [1, 0]);  primals_146 = None\n    addmm_47 = torch.ops.aten.addmm.default(primals_147, view_142, permute_95);  primals_147 = None\n    view_143 = torch.ops.aten.view.default(addmm_47, [1, 64, 768]);  addmm_47 = None\n    add_96 = torch.ops.aten.add.Tensor(add_91, view_143);  add_91 = view_143 = None\n    var_mean_24 = torch.ops.aten.var_mean.correction(add_96, [2], correction = 0, keepdim = True)\n    getitem_132 = var_mean_24[0]\n    getitem_133 = var_mean_24[1];  var_mean_24 = None\n    add_97 = torch.ops.aten.add.Tensor(getitem_132, 1e-05);  getitem_132 = None\n    rsqrt_24 = torch.ops.aten.rsqrt.default(add_97);  add_97 = None\n    sub_24 = torch.ops.aten.sub.Tensor(add_96, getitem_133);  add_96 = getitem_133 = None\n    mul_96 = torch.ops.aten.mul.Tensor(sub_24, rsqrt_24);  sub_24 = None\n    mul_97 = torch.ops.aten.mul.Tensor(mul_96, primals_148)\n    add_98 = torch.ops.aten.add.Tensor(mul_97, primals_149);  mul_97 = primals_149 = None\n    full_default = torch.ops.aten.full.default([1], -1, dtype = torch.int64, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    index = torch.ops.aten.index.Tensor(add_98, [None, full_default]);  add_98 = None\n    permute_96 = torch.ops.aten.permute.default(primals_2, [1, 0]);  primals_2 = None\n    view_144 = torch.ops.aten.view.default(index, [1, 768]);  index = None\n    mm = torch.ops.aten.mm.default(view_144, permute_96)\n    view_145 = torch.ops.aten.view.default(mm, [1, 1, 50304]);  mm = None\n    permute_99 = torch.ops.aten.permute.default(permute_96, [1, 0]);  permute_96 = None\n    div = torch.ops.aten.div.Tensor(rsqrt_24, 768);  rsqrt_24 = None\n    permute_101 = torch.ops.aten.permute.default(permute_95, [1, 0]);  permute_95 = None\n    permute_105 = torch.ops.aten.permute.default(permute_94, [1, 0]);  permute_94 = None\n    div_1 = torch.ops.aten.div.Tensor(rsqrt_23, 768);  rsqrt_23 = None\n    permute_109 = torch.ops.aten.permute.default(permute_93, [1, 0]);  permute_93 = None\n    permute_117 = torch.ops.aten.permute.default(permute_88, [1, 0]);  permute_88 = None\n    div_2 = torch.ops.aten.div.Tensor(rsqrt_22, 768);  rsqrt_22 = None\n    permute_121 = torch.ops.aten.permute.default(permute_87, [1, 0]);  permute_87 = None\n    permute_125 = torch.ops.aten.permute.default(permute_86, [1, 0]);  permute_86 = None\n    div_3 = torch.ops.aten.div.Tensor(rsqrt_21, 768);  rsqrt_21 = None\n    permute_129 = torch.ops.aten.permute.default(permute_85, [1, 0]);  permute_85 = None\n    permute_137 = torch.ops.aten.permute.default(permute_80, [1, 0]);  permute_80 = None\n    div_4 = torch.ops.aten.div.Tensor(rsqrt_20, 768);  rsqrt_20 = None\n    permute_141 = torch.ops.aten.permute.default(permute_79, [1, 0]);  permute_79 = None\n    permute_145 = torch.ops.aten.permute.default(permute_78, [1, 0]);  permute_78 = None\n    div_5 = torch.ops.aten.div.Tensor(rsqrt_19, 768);  rsqrt_19 = None\n    permute_149 = torch.ops.aten.permute.default(permute_77, [1, 0]);  permute_77 = None\n    permute_157 = torch.ops.aten.permute.default(permute_72, [1, 0]);  permute_72 = None\n    div_6 = torch.ops.aten.div.Tensor(rsqrt_18, 768);  rsqrt_18 = None\n    permute_161 = torch.ops.aten.permute.default(permute_71, [1, 0]);  permute_71 = None\n    permute_165 = torch.ops.aten.permute.default(permute_70, [1, 0]);  permute_70 = None\n    div_7 = torch.ops.aten.div.Tensor(rsqrt_17, 768);  rsqrt_17 = None\n    permute_169 = torch.ops.aten.permute.default(permute_69, [1, 0]);  permute_69 = None\n    permute_177 = torch.ops.aten.permute.default(permute_64, [1, 0]);  permute_64 = None\n    div_8 = torch.ops.aten.div.Tensor(rsqrt_16, 768);  rsqrt_16 = None\n    permute_181 = torch.ops.aten.permute.default(permute_63, [1, 0]);  permute_63 = None\n    permute_185 = torch.ops.aten.permute.default(permute_62, [1, 0]);  permute_62 = None\n    div_9 = torch.ops.aten.div.Tensor(rsqrt_15, 768);  rsqrt_15 = None\n    permute_189 = torch.ops.aten.permute.default(permute_61, [1, 0]);  permute_61 = None\n    permute_197 = torch.ops.aten.permute.default(permute_56, [1, 0]);  permute_56 = None\n    div_10 = torch.ops.aten.div.Tensor(rsqrt_14, 768);  rsqrt_14 = None\n    permute_201 = torch.ops.aten.permute.default(permute_55, [1, 0]);  permute_55 = None\n    permute_205 = torch.ops.aten.permute.default(permute_54, [1, 0]);  permute_54 = None\n    div_11 = torch.ops.aten.div.Tensor(rsqrt_13, 768);  rsqrt_13 = None\n    permute_209 = torch.ops.aten.permute.default(permute_53, [1, 0]);  permute_53 = None\n    permute_217 = torch.ops.aten.permute.default(permute_48, [1, 0]);  permute_48 = None\n    div_12 = torch.ops.aten.div.Tensor(rsqrt_12, 768);  rsqrt_12 = None\n    permute_221 = torch.ops.aten.permute.default(permute_47, [1, 0]);  permute_47 = None\n    permute_225 = torch.ops.aten.permute.default(permute_46, [1, 0]);  permute_46 = None\n    div_13 = torch.ops.aten.div.Tensor(rsqrt_11, 768);  rsqrt_11 = None\n    permute_229 = torch.ops.aten.permute.default(permute_45, [1, 0]);  permute_45 = None\n    permute_237 = torch.ops.aten.permute.default(permute_40, [1, 0]);  permute_40 = None\n    div_14 = torch.ops.aten.div.Tensor(rsqrt_10, 768);  rsqrt_10 = None\n    permute_241 = torch.ops.aten.permute.default(permute_39, [1, 0]);  permute_39 = None\n    permute_245 = torch.ops.aten.permute.default(permute_38, [1, 0]);  permute_38 = None\n    div_15 = torch.ops.aten.div.Tensor(rsqrt_9, 768);  rsqrt_9 = None\n    permute_249 = torch.ops.aten.permute.default(permute_37, [1, 0]);  permute_37 = None\n    permute_257 = torch.ops.aten.permute.default(permute_32, [1, 0]);  permute_32 = None\n    div_16 = torch.ops.aten.div.Tensor(rsqrt_8, 768);  rsqrt_8 = None\n    permute_261 = torch.ops.aten.permute.default(permute_31, [1, 0]);  permute_31 = None\n    permute_265 = torch.ops.aten.permute.default(permute_30, [1, 0]);  permute_30 = None\n    div_17 = torch.ops.aten.div.Tensor(rsqrt_7, 768);  rsqrt_7 = None\n    permute_269 = torch.ops.aten.permute.default(permute_29, [1, 0]);  permute_29 = None\n    permute_277 = torch.ops.aten.permute.default(permute_24, [1, 0]);  permute_24 = None\n    div_18 = torch.ops.aten.div.Tensor(rsqrt_6, 768);  rsqrt_6 = None\n    permute_281 = torch.ops.aten.permute.default(permute_23, [1, 0]);  permute_23 = None\n    permute_285 = torch.ops.aten.permute.default(permute_22, [1, 0]);  permute_22 = None\n    div_19 = torch.ops.aten.div.Tensor(rsqrt_5, 768);  rsqrt_5 = None\n    permute_289 = torch.ops.aten.permute.default(permute_21, [1, 0]);  permute_21 = None\n    permute_297 = torch.ops.aten.permute.default(permute_16, [1, 0]);  permute_16 = None\n    div_20 = torch.ops.aten.div.Tensor(rsqrt_4, 768);  rsqrt_4 = None\n    permute_301 = torch.ops.aten.permute.default(permute_15, [1, 0]);  permute_15 = None\n    permute_305 = torch.ops.aten.permute.default(permute_14, [1, 0]);  permute_14 = None\n    div_21 = torch.ops.aten.div.Tensor(rsqrt_3, 768);  rsqrt_3 = None\n    permute_309 = torch.ops.aten.permute.default(permute_13, [1, 0]);  permute_13 = None\n    permute_317 = torch.ops.aten.permute.default(permute_8, [1, 0]);  permute_8 = None\n    div_22 = torch.ops.aten.div.Tensor(rsqrt_2, 768);  rsqrt_2 = None\n    permute_321 = torch.ops.aten.permute.default(permute_7, [1, 0]);  permute_7 = None\n    permute_325 = torch.ops.aten.permute.default(permute_6, [1, 0]);  permute_6 = None\n    div_23 = torch.ops.aten.div.Tensor(rsqrt_1, 768);  rsqrt_1 = None\n    permute_329 = torch.ops.aten.permute.default(permute_5, [1, 0]);  permute_5 = None\n    permute_337 = torch.ops.aten.permute.default(permute, [1, 0]);  permute = None\n    div_24 = torch.ops.aten.div.Tensor(rsqrt, 768);  rsqrt = None\n    return (view_145, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[yily4oahymyyzyspnyjgkwteqzeiwe4kjdldmy3tmjumziqf7zb] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[gqb2vspiuwox2kgd2oeoxezbk3ia6ckfpuiqza2vhvphouxwhv5] fx_kwargs[static_input_idxs]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148]", "[moyibva4eclxkrvb6e7da5ve2knrozngxwjgojtfbwsd4wt762m] fx_kwargs[user_visible_outputs]: {'view_145': None}", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]}
+V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "38efd6f35d8d14d5151e15991476a39a"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977760761464.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "8cfe0861d6e86b59ec82f05bd122f742"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977760761566.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.761000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "730fc8316778651443b74058f01841e6"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1722977760761706.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fbafd3f40aba7f04b0dafd48adfab1af"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1722977760764617.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "886313546297de520526a20c4afcc203"}
+	{
+	"name": "backend_compile",
+	"ts": 1722977760764839.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:00.764000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "535d95fac8886817ed3f41158501e8fe"}
+	{
+	"name": "OutputGraph.call_user_compiler",
+	"ts": 1722977760764914.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.004000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "634e39b71251a8c4dda8e4e18aea5906"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod)
+	| | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328)                  
+	| | +- GuardManager: source=L['mod'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod'].__dict__)          
+	| | | +- DictSubclassGuardManager: source=L['mod']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | +- DictSubclassGuardManager: source=L['mod']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | +- GuardManager: source=L['mod'].config, accessed_by=DictGetItemGuardAccessor(config)
+	| | | | +- TYPE_MATCH: ___check_type_id(L['mod'].config, 94206531296000)           
+	| | | | +- GuardManager: source=L['mod'].config.block_size, accessed_by=GetAttrGuardAccessor(block_size)
+	| | | | | +- EQUALS_MATCH: L['mod'].config.block_size == 1024                          
+	| | | +- GuardManager: source=L['mod']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | +- DICT_LENGTH: len(L['mod']._modules) == 2                                 
+	| | | | +- GuardManager: source=L['mod']._modules['transformer'], accessed_by=DictGetItemGuardAccessor(transformer)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer'], 94206198915872)
+	| | | | | +- GuardManager: source=L['mod']._modules['transformer'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules) == 5         
+	| | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'], accessed_by=DictGetItemGuardAccessor(wte)
+	| | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['wte'], 94206200083792)
+	| | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['wte'].__dict__)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].sparse, accessed_by=DictGetItemGuardAccessor(sparse)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].sparse, 94206128801376)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].max_norm, accessed_by=DictGetItemGuardAccessor(max_norm)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].max_norm, 94206128752608)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].norm_type, accessed_by=DictGetItemGuardAccessor(norm_type)
+	| | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['wte'].norm_type == 2.0
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['wte']._parameters) == 1
+	| | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['wte']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[50304, 768], stride=[768, 1])
+	| | | | | | | | | | | +- OBJECT_ALIASING: L['mod']._modules['transformer']._modules['wte']._parameters['weight'] is L['mod']._modules['lm_head']._parameters['weight']
+	| | | | | | | | | | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['cloned_inputs'][0], L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], L['mod']._modules['transformer']._modules['wte']._parameters['weight'], L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight'])
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].padding_idx, accessed_by=DictGetItemGuardAccessor(padding_idx)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].padding_idx, 94206128752608)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wte'].scale_grad_by_freq, accessed_by=DictGetItemGuardAccessor(scale_grad_by_freq)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wte'].scale_grad_by_freq, 94206128801376)
+	| | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'], accessed_by=DictGetItemGuardAccessor(wpe)
+	| | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['wpe'], 94206200083792)
+	| | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['wpe'].__dict__)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].sparse, accessed_by=DictGetItemGuardAccessor(sparse)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].sparse, 94206128801376)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].max_norm, accessed_by=DictGetItemGuardAccessor(max_norm)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].max_norm, 94206128752608)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].norm_type, accessed_by=DictGetItemGuardAccessor(norm_type)
+	| | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['wpe'].norm_type == 2.0
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['wpe']._parameters) == 1
+	| | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['wpe']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1024, 768], stride=[768, 1])
+	| | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].padding_idx, accessed_by=DictGetItemGuardAccessor(padding_idx)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].padding_idx, 94206128752608)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['wpe'].scale_grad_by_freq, accessed_by=DictGetItemGuardAccessor(scale_grad_by_freq)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['wpe'].scale_grad_by_freq, 94206128801376)
+	| | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'], accessed_by=DictGetItemGuardAccessor(drop)
+	| | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['drop'], 94206199111456)
+	| | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['drop'].__dict__)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['drop'].p == 0.0  
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['drop'].inplace, 94206128801376)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['drop'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['drop'].training, 94206128801408)
+	| | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h'], accessed_by=DictGetItemGuardAccessor(h)
+	| | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h'], 94206198914912)
+	| | | | | | | | +- LENGTH_CHECK: len(L['mod']._modules['transformer']._modules['h']) == 12   
+	| | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | +- DictGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | +- KeyValueManager pair at index=0
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[0]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[0] == '0'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['0']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['0']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=1
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[1]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[1] == '1'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['1']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=2
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[2]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[2] == '2'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['2']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=3
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[3]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[3] == '3'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['3']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['3']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=4
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[4]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[4] == '4'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['4']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['4']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=5
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[5]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[5] == '5'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['5']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['5']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=6
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[6]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[6] == '6'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['6']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['6']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=7
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[7]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[7] == '7'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['7']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['7']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=8
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[8]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[8] == '8'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['8']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['8']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=9
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[9]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[9] == '9'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['9']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['9']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=10
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[10]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[10] == '10'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['10']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['10']._backward_pre_hooks
+	| | | | | | | | | | +- KeyValueManager pair at index=11
+	| | | | | | | | | | | +- KeyManager: GuardManager: source=list(L['mod']._modules['transformer']._modules['h']._modules.keys())[11]
+	| | | | | | | | | | | | +- EQUALS_MATCH: list(L['mod']._modules['transformer']._modules['h']._modules.keys())[11] == '11'
+	| | | | | | | | | | | +- ValueManager: GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']
+	| | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11'], 94206531295040)
+	| | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11'].__dict__)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules) == 4
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'], accessed_by=DictGetItemGuardAccessor(ln_1)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_1']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'], accessed_by=DictGetItemGuardAccessor(attn)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'], 94206531293120)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].flash, accessed_by=DictGetItemGuardAccessor(flash)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].flash, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_embd, accessed_by=DictGetItemGuardAccessor(n_embd)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_embd == 768
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_head, accessed_by=DictGetItemGuardAccessor(n_head)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].n_head == 12
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].dropout, accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].dropout == 0.0
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules) == 4
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'], accessed_by=DictGetItemGuardAccessor(c_attn)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_attn']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['attn_dropout'], accessed_by=DictGetItemGuardAccessor(attn_dropout)
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'], accessed_by=DictGetItemGuardAccessor(resid_dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._modules['resid_dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['attn']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'], accessed_by=DictGetItemGuardAccessor(ln_2)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'], 94206531292160)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters) == 2
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['ln_2']._backward_pre_hooks
+	| | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'], accessed_by=DictGetItemGuardAccessor(mlp)
+	| | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'], 94206531294080)
+	| | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp'].__dict__)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules, accessed_by=DictGetItemGuardAccessor(_modules)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules) == 3
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'], accessed_by=DictGetItemGuardAccessor(c_fc)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_fc']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'], accessed_by=DictGetItemGuardAccessor(c_proj)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'], 94206198956688)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters) == 2
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['c_proj']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'], accessed_by=DictGetItemGuardAccessor(dropout)
+	| | | | | | | | | | | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'], 94206199111456)
+	| | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | | | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].__dict__)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].p, accessed_by=DictGetItemGuardAccessor(p)
+	| | | | | | | | | | | | | | | | | | | | +- EQUALS_MATCH: L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].p == 0.0
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].inplace, accessed_by=DictGetItemGuardAccessor(inplace)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].inplace, 94206128801376)
+	| | | | | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].training, accessed_by=DictGetItemGuardAccessor(training)
+	| | | | | | | | | | | | | | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._modules['dropout'].training, 94206128801408)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._parameters
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_hooks
+	| | | | | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._modules['mlp']._backward_pre_hooks
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._parameters
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._backward_hooks
+	| | | | | | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['h']._modules['11']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['h']._modules['11']._backward_pre_hooks
+	| | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f'], accessed_by=DictGetItemGuardAccessor(ln_f)
+	| | | | | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['transformer']._modules['ln_f'], 94206531292160)
+	| | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['transformer']._modules['ln_f'].__dict__)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | | | | +- DICT_LENGTH: len(L['mod']._modules['transformer']._modules['ln_f']._parameters) == 2
+	| | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['ln_f']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | | | | +- TENSOR_MATCH: check_tensor(L['mod']._modules['transformer']._modules['ln_f']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._forward_hooks, accessed_by=DictGetItemGuardAccessor(_forward_hooks)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['ln_f']._backward_hooks
+	| | | | | | | | | +- DictSubclassGuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._forward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_forward_pre_hooks)
+	| | | | | | | | | +- GuardManager: source=L['mod']._modules['transformer']._modules['ln_f']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._modules['ln_f']._backward_pre_hooks
+	| | | | | | +- GuardManager: source=L['mod']._modules['transformer']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | +- DICT_LENGTH: not L['mod']._modules['transformer']._parameters            
+	| | | | +- GuardManager: source=L['mod']._modules['lm_head'], accessed_by=DictGetItemGuardAccessor(lm_head)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['mod']._modules['lm_head'], 94206198956688)
+	| | | | | +- GuardManager: source=L['mod']._modules['lm_head'].__dict__, accessed_by=GetGenericDictGuardAccessor
+	| | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['mod']._modules['lm_head'].__dict__)
+	| | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | | | | +- DICT_LENGTH: len(L['mod']._modules['lm_head']._parameters) == 2          
+	| | | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters['weight'], accessed_by=DictGetItemGuardAccessor(weight)
+	| | | | | | | | +- OBJECT_ALIASING: L['mod']._modules['transformer']._modules['wte']._parameters['weight'] is L['mod']._modules['lm_head']._parameters['weight']
+	| | | | | | | +- GuardManager: source=L['mod']._modules['lm_head']._parameters['bias'], accessed_by=DictGetItemGuardAccessor(bias)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(L['mod']._modules['lm_head']._parameters['bias'], 94206128752608)
+	| | | +- GuardManager: source=L['mod']._parameters, accessed_by=DictGetItemGuardAccessor(_parameters)
+	| | | | +- DICT_LENGTH: not L['mod']._parameters                                    
+	| | | +- GuardManager: source=L['mod']._backward_hooks, accessed_by=DictGetItemGuardAccessor(_backward_hooks)
+	| | | | +- DICT_LENGTH: not L['mod']._backward_hooks                                
+	| | | +- GuardManager: source=L['mod']._backward_pre_hooks, accessed_by=DictGetItemGuardAccessor(_backward_pre_hooks)
+	| | | | +- DICT_LENGTH: not L['mod']._backward_pre_hooks                            
+	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self)
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624)                 
+	| | +- GuardManager: source=L['self'].autocast, accessed_by=GetAttrGuardAccessor(autocast)
+	| | | +- ID_MATCH: ___check_obj_id(L['self'].autocast, 94206129614704)         
+	| | +- GuardManager: source=L['self'].autocast_arg, accessed_by=GetAttrGuardAccessor(autocast_arg)
+	| | | +- DICT_LENGTH: not L['self'].autocast_arg                                  
+	| +- GuardManager: source=L['cloned_inputs'], accessed_by=DictGetItemGuardAccessor(cloned_inputs)
+	| | +- TYPE_MATCH: ___check_type_id(L['cloned_inputs'], 94206128766016)        
+	| | +- LENGTH_CHECK: len(L['cloned_inputs']) == 1                                
+	| | +- GuardManager: source=L['cloned_inputs'][0], accessed_by=ListGetItemGuardAccessor(0)
+	| | | +- TENSOR_MATCH: check_tensor(L['cloned_inputs'][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int64, device=0, requires_grad=False, size=[1, 64], stride=[64, 1])
+	| | | +- NO_HASATTR: hasattr(L['cloned_inputs'][0], '_dynamo_dynamic_indices') == False
+	| | | +- NO_TENSOR_ALIASING
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['__builtins_dict___7'], accessed_by=DictGetItemGuardAccessor(__builtins_dict___7)
+	| | | +- GuardManager: source=G['__builtins_dict___7']['dict'], accessed_by=DictGetItemGuardAccessor(dict)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['dict'], 94206128762464)
+	| | | +- GuardManager: source=G['__builtins_dict___7']['iter'], accessed_by=DictGetItemGuardAccessor(iter)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['iter'], 140565189726576)
+	| | | +- GuardManager: source=G['__builtins_dict___7']['isinstance'], accessed_by=DictGetItemGuardAccessor(isinstance)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___7']['isinstance'], 140565189726416)
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_linear)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'], 140563315432704)
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F, accessed_by=GetAttrGuardAccessor(F)
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_module)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_module'], 140563346519712)
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_hooks)
+	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_hooks)
+	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_pre_hooks)
+	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_pre_hooks)
+	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_sparse)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'], 140563269489408)
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F, accessed_by=GetAttrGuardAccessor(F)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F, 140563315434864)
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_linear'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.linear, accessed_by=GetAttrGuardAccessor(linear)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.linear, 140563483168176)
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.dropout, accessed_by=GetAttrGuardAccessor(dropout)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.dropout, 140563303880096)
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.embedding, accessed_by=GetAttrGuardAccessor(embedding)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.embedding, 140563303884704)
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.layer_norm, accessed_by=GetAttrGuardAccessor(layer_norm)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.layer_norm, 140563303885712)
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.scaled_dot_product_attention, accessed_by=GetAttrGuardAccessor(scaled_dot_product_attention)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_sparse'].F.scaled_dot_product_attention, 140563482417552)
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_dropout)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_dropout'], 140563269207600)
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'].F, accessed_by=GetAttrGuardAccessor(F)
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_container'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_nn_dot_modules_dot_container)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_container'], 140563268955776)
+	| | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'], accessed_by=DictGetItemGuardAccessor(__import_torchbenchmark_dot_models_dot_nanogpt_dot_model)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'], 140561618535104)
+	| | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math, accessed_by=GetAttrGuardAccessor(math)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math, 140565183540704)
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.pi, accessed_by=GetAttrGuardAccessor(pi)
+	| | | | | +- EQUALS_MATCH: G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.pi == 3.141592653589793
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].math.sqrt, 140565184131408)
+	| | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch, accessed_by=GetAttrGuardAccessor(torch)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch, 140565184683664)
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn, accessed_by=GetAttrGuardAccessor(nn)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn, 140563346511472)
+	| | | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional, accessed_by=GetAttrGuardAccessor(functional)
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.nn.functional
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.pow, accessed_by=GetAttrGuardAccessor(pow)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.pow, 140565181125936)
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.long, accessed_by=GetAttrGuardAccessor(long)
+	| | | | | +- EQUALS_MATCH: G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.long == torch.int64
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.tanh, accessed_by=GetAttrGuardAccessor(tanh)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.tanh, 140565181101168)
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.arange, accessed_by=GetAttrGuardAccessor(arange)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].torch.arange, 140565181040736)
+	| | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu, accessed_by=GetAttrGuardAccessor(new_gelu)
+	| | | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu.__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].new_gelu.__code__, 140561652999328)
+	| | | +- GuardManager: source=G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F, accessed_by=GetAttrGuardAccessor(F)
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_nn_dot_modules_dot_sparse'].F is G['__import_torchbenchmark_dot_models_dot_nanogpt_dot_model'].F
+	
+V0806 13:56:01.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "512cbce354a3c2247fd5ce255fa5310b"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977761005735.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "69d31a588f8918001abc5fb068f845d2"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977761005843.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.006000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "2/0", "frame_key": "3", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 438, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 1713, "shape_env_guard_count": 0, "graph_op_count": 393, "graph_node_count": 543, "graph_input_count": 149, "start_time": 1722977751.3634944, "entire_frame_compile_time_s": 9.642390012741089, "backend_compile_time_s": 8.403583765029907, "inductor_compile_time_s": 4.873299598693848, "code_gen_time_s": 3.313795566558838, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function reduce_to_scalar_loss in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.4969205856323242, "has_guarded_code": true}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.007000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "84f68f158e9e577b45fe78f1b10b1a9c"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977761007495.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.007000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9693821b0ca26729ddde83adc52af6fb"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977761007762.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.268000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0d4a8e93cf617b30c0e2c59d80cd0e78"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761268522.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.512000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0143b142071834aec8ab7d4b288e8a91"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761511918.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.513000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5e2f289c2b278e69c80cc13db919462a"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761513285.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.604000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ae366a49f803f1a405565bf9f2c9283d"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761604478.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.610000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0125a12d93ac7f49266a1f310a5a9705"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761610516.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.703000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9a2e32e482a8b02c1de380e4f7d9c3a0"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977761703233.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.705000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 426, "name": "compute_loss", "filename": 1}]}, "frame_id": 3, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:01.705000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a345d7189414418bbe21675f688e5406"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977761705841.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.705000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "53f1497b31d0e8e868eb5ed513c8edf3"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977761705918.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 109, "size": 201216}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 3, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 1, 50304], "requires_grad": true, "stride": [50304, 50304, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e416c5e0>", "describer_id": 109}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 109, "id": 0, "source": "L['pred']"}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.710000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1, "has_payload": "0251979d852e8b490120b35388d9750f"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['pred'], accessed_by=DictGetItemGuardAccessor(pred)
+	| | +- TENSOR_MATCH: check_tensor(L['pred'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1, 1, 50304], stride=[50304, 50304, 1])
+	| | +- NO_HASATTR: hasattr(L['pred'], '_dynamo_dynamic_indices') == False      
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['reduce_to_scalar_loss'], accessed_by=DictGetItemGuardAccessor(reduce_to_scalar_loss)
+	| | | +- ID_MATCH: ___check_obj_id(G['reduce_to_scalar_loss'], 140561699517584)
+	
+V0806 13:56:01.710000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e2fbb5cb8efb7e6c931fbcfb93b33000"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977761710834.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.710000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f378dc6ebc87b79696a622fb1503e8c9"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977761710908.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "3/0", "frame_key": "4", "co_name": "compute_loss", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 426, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 1, "graph_input_count": 1, "start_time": 1722977761.7058077, "entire_frame_compile_time_s": 0.005119800567626953, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function reduce_to_scalar_loss in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.0011796951293945312, "has_guarded_code": true}, "frame_id": 3, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.711000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}]}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5563f1c7c00ed0592ebe0721d8d15aaa"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977761711605.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.711000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0d4ebb6c2078eda941d9db2c5c59c5a3"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977761711674.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 110, "size": 4}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:01.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d03c1cb0>", "describer_id": 110}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:01.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 110, "id": 0, "source": "L['___stack1']"}, "frame_id": 4, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:01.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 111, "size": 4}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d03c1cb0>", "describer_id": 111}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 111, "id": 0, "source": "L['___stack1']"}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.720000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1, "has_payload": "88e89e154b64eda379f4f8faa2423abf"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self)
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624)                 
+	| | +- GuardManager: source=L['self'].grad_scaler, accessed_by=GetAttrGuardAccessor(grad_scaler)
+	| | | +- TYPE_MATCH: ___check_type_id(L['self'].grad_scaler, 94206246390304)     
+	| +- GuardManager: source=L['___stack0'], accessed_by=DictGetItemGuardAccessor(___stack0)
+	| | +- ID_MATCH: ___check_obj_id(L['___stack0'], 94206129614704)             
+	| +- GuardManager: source=L['___stack1'], accessed_by=DictGetItemGuardAccessor(___stack1)
+	| | +- TENSOR_MATCH: check_tensor(L['___stack1'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[], stride=[])
+	| | +- NO_HASATTR: hasattr(L['___stack1'], '_dynamo_dynamic_indices') == False 
+	
+V0806 13:56:01.720000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "779e853a676e6fc1f8bc98d08b7a37e0"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977761720908.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.720000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7007ee210e774dfdfc169a55bde2c2e9"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977761720981.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:01.721000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "4/0", "frame_key": "5", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 444, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 9, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 1, "graph_input_count": 1, "start_time": 1722977761.7115858, "entire_frame_compile_time_s": 0.009430170059204102, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["Tensor.backward"], "dynamo_time_before_restart_s": 0.0031707286834716797, "has_guarded_code": true}, "frame_id": 4, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:01.722000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "18b8fcfb792a459c0a5419875f8b2f45"}
+	{
+	"name": "compile_fx.<locals>.bw_compiler",
+	"ts": 1722977761722201.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.723000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "dae2747e1c3069c100850462650d8e87"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977761723042.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.723000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c375948250791b18bbfdf36bd1f57da2"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977761723133.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:01.869000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "0d9dc4e9b2e5066df686c9b12e9da24f"}
+	
+	import torch
+	from torch import tensor, device
+	import torch.fx as fx
+	from torch._dynamo.testing import rand_strided
+	from math import inf
+	import torch._inductor.inductor_prims
+	
+	import torch._dynamo.config
+	import torch._inductor.config
+	import torch._functorch.config
+	import torch.fx.experimental._config
+	
+	torch._inductor.config.triton.cudagraphs = True
+	
+	
+	
+	
+	isolate_fails_code_str = None
+	
+	
+	
+	# torch version: 2.5.0a0+git6fbc72b
+	# torch cuda version: 12.0
+	# torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d
+	
+	
+	# CUDA Info: 
+	# nvcc: NVIDIA (R) Cuda compiler driver 
+	# Copyright (c) 2005-2023 NVIDIA Corporation 
+	# Built on Fri_Jan__6_16:45:21_PST_2023 
+	# Cuda compilation tools, release 12.0, V12.0.140 
+	# Build cuda_12.0.r12.0/compiler.32267302_0 
+	
+	# GPU Hardware Info: 
+	# NVIDIA H100 : 1 
+	
+	
+	from torch.nn import *
+	class Repro(torch.nn.Module):
+	    def __init__(self) -> None:
+	        super().__init__()
+	
+	    
+	    
+	    def forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1):
+	        view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]);  tangents_1 = None
+	        permute_97 = torch.ops.aten.permute.default(view_146, [1, 0])
+	        mm_1 = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None
+	        permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None
+	        mm_2 = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None
+	        view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]);  mm_2 = None
+	        permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None
+	        full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True);  full_default_1 = full_default = view_147 = None
+	        mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148);  primals_148 = None
+	        mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768)
+	        sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)
+	        mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96);  mul_99 = None
+	        sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None
+	        mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2);  sum_2 = None
+	        sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None
+	        sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None
+	        mul_103 = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None
+	        mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96);  mul_96 = None
+	        sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None
+	        sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]);  index_put = None
+	        view_148 = torch.ops.aten.view.default(mul_103, [64, 768])
+	        mm_3 = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None
+	        permute_102 = torch.ops.aten.permute.default(view_148, [1, 0])
+	        mm_4 = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None
+	        permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None
+	        sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None
+	        view_149 = torch.ops.aten.view.default(sum_5, [768]);  sum_5 = None
+	        permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None
+	        view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]);  mm_3 = None
+	        view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]);  addmm_46 = None
+	        mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None
+	        pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None
+	        mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0)
+	        mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None
+	        mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11);  tanh_11 = None
+	        sub_28 = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None
+	        mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None
+	        mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None
+	        mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715)
+	        pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None
+	        mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None
+	        mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None
+	        add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None
+	        mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None
+	        add_100 = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None
+	        view_151 = torch.ops.aten.view.default(add_100, [64, 3072]);  add_100 = None
+	        mm_5 = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None
+	        permute_106 = torch.ops.aten.permute.default(view_151, [1, 0])
+	        mm_6 = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None
+	        permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None
+	        sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None
+	        view_152 = torch.ops.aten.view.default(sum_6, [3072]);  sum_6 = None
+	        permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None
+	        view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]);  mm_5 = None
+	        mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None
+	        mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768)
+	        sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)
+	        mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90);  mul_115 = None
+	        sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None
+	        mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8);  sum_8 = None
+	        sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None
+	        sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None
+	        mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None
+	        mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90);  mul_90 = None
+	        sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None
+	        sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None
+	        add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None
+	        view_154 = torch.ops.aten.view.default(add_101, [64, 768])
+	        mm_7 = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None
+	        permute_110 = torch.ops.aten.permute.default(view_154, [1, 0])
+	        permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None
+	        mm_8 = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None
+	        permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None
+	        sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None
+	        view_155 = torch.ops.aten.view.default(sum_11, [768]);  sum_11 = None
+	        permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None
+	        view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]);  mm_7 = None
+	        view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]);  view_156 = None
+	        permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None
+	        _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None
+	        getitem_134 = _scaled_dot_product_efficient_attention_backward[0]
+	        getitem_135 = _scaled_dot_product_efficient_attention_backward[1]
+	        getitem_136 = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None
+	        permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None
+	        view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]);  permute_114 = None
+	        permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None
+	        view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]);  permute_115 = None
+	        permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None
+	        view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]);  permute_116 = None
+	        cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None
+	        view_161 = torch.ops.aten.view.default(cat, [64, 2304]);  cat = None
+	        mm_9 = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None
+	        permute_118 = torch.ops.aten.permute.default(view_161, [1, 0])
+	        mm_10 = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None
+	        permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None
+	        sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None
+	        view_162 = torch.ops.aten.view.default(sum_12, [2304]);  sum_12 = None
+	        permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None
+	        view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]);  mm_9 = None
+	        mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None
+	        mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768)
+	        sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)
+	        mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88);  mul_122 = None
+	        sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None
+	        mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14);  sum_14 = None
+	        sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None
+	        sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None
+	        mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None
+	        mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88);  mul_88 = None
+	        sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None
+	        sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None
+	        add_102 = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None
+	        view_164 = torch.ops.aten.view.default(add_102, [64, 768])
+	        mm_11 = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None
+	        permute_122 = torch.ops.aten.permute.default(view_164, [1, 0])
+	        mm_12 = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None
+	        permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None
+	        sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None
+	        view_165 = torch.ops.aten.view.default(sum_17, [768]);  sum_17 = None
+	        permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None
+	        view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]);  mm_11 = None
+	        view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]);  addmm_42 = None
+	        mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None
+	        pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None
+	        mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0)
+	        mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None
+	        mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10);  tanh_10 = None
+	        sub_35 = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None
+	        mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None
+	        mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None
+	        mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715)
+	        pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None
+	        mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None
+	        mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None
+	        add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None
+	        mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None
+	        add_104 = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None
+	        view_167 = torch.ops.aten.view.default(add_104, [64, 3072]);  add_104 = None
+	        mm_13 = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None
+	        permute_126 = torch.ops.aten.permute.default(view_167, [1, 0])
+	        mm_14 = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None
+	        permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None
+	        sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None
+	        view_168 = torch.ops.aten.view.default(sum_18, [3072]);  sum_18 = None
+	        permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None
+	        view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]);  mm_13 = None
+	        mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None
+	        mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768)
+	        sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)
+	        mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82);  mul_138 = None
+	        sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None
+	        mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20);  sum_20 = None
+	        sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None
+	        sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None
+	        mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None
+	        mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82);  mul_82 = None
+	        sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None
+	        sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None
+	        add_105 = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None
+	        view_170 = torch.ops.aten.view.default(add_105, [64, 768])
+	        mm_15 = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None
+	        permute_130 = torch.ops.aten.permute.default(view_170, [1, 0])
+	        permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None
+	        mm_16 = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None
+	        permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None
+	        sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None
+	        view_171 = torch.ops.aten.view.default(sum_23, [768]);  sum_23 = None
+	        permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None
+	        view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]);  mm_15 = None
+	        view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]);  view_172 = None
+	        permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None
+	        _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None
+	        getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0]
+	        getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1]
+	        getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None
+	        permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None
+	        view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]);  permute_134 = None
+	        permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None
+	        view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]);  permute_135 = None
+	        permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None
+	        view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]);  permute_136 = None
+	        cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None
+	        view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]);  cat_1 = None
+	        mm_17 = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None
+	        permute_138 = torch.ops.aten.permute.default(view_177, [1, 0])
+	        mm_18 = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None
+	        permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None
+	        sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None
+	        view_178 = torch.ops.aten.view.default(sum_24, [2304]);  sum_24 = None
+	        permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None
+	        view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]);  mm_17 = None
+	        mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None
+	        mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768)
+	        sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)
+	        mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80);  mul_145 = None
+	        sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None
+	        mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26);  sum_26 = None
+	        sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None
+	        sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None
+	        mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None
+	        mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80);  mul_80 = None
+	        sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None
+	        sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None
+	        add_106 = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None
+	        view_180 = torch.ops.aten.view.default(add_106, [64, 768])
+	        mm_19 = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None
+	        permute_142 = torch.ops.aten.permute.default(view_180, [1, 0])
+	        mm_20 = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None
+	        permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None
+	        sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None
+	        view_181 = torch.ops.aten.view.default(sum_29, [768]);  sum_29 = None
+	        permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None
+	        view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]);  mm_19 = None
+	        view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]);  addmm_38 = None
+	        mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None
+	        pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None
+	        mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0)
+	        mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None
+	        mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9);  tanh_9 = None
+	        sub_42 = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None
+	        mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None
+	        mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None
+	        mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715)
+	        pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None
+	        mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None
+	        mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None
+	        add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None
+	        mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None
+	        add_108 = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None
+	        view_183 = torch.ops.aten.view.default(add_108, [64, 3072]);  add_108 = None
+	        mm_21 = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None
+	        permute_146 = torch.ops.aten.permute.default(view_183, [1, 0])
+	        mm_22 = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None
+	        permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None
+	        sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None
+	        view_184 = torch.ops.aten.view.default(sum_30, [3072]);  sum_30 = None
+	        permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None
+	        view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]);  mm_21 = None
+	        mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None
+	        mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768)
+	        sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)
+	        mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74);  mul_161 = None
+	        sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None
+	        mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32);  sum_32 = None
+	        sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None
+	        sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None
+	        mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None
+	        mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74);  mul_74 = None
+	        sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None
+	        sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None
+	        add_109 = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None
+	        view_186 = torch.ops.aten.view.default(add_109, [64, 768])
+	        mm_23 = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None
+	        permute_150 = torch.ops.aten.permute.default(view_186, [1, 0])
+	        permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None
+	        mm_24 = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None
+	        permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None
+	        sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None
+	        view_187 = torch.ops.aten.view.default(sum_35, [768]);  sum_35 = None
+	        permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None
+	        view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]);  mm_23 = None
+	        view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]);  view_188 = None
+	        permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None
+	        _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None
+	        getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0]
+	        getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1]
+	        getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None
+	        permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None
+	        view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]);  permute_154 = None
+	        permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None
+	        view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]);  permute_155 = None
+	        permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None
+	        view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]);  permute_156 = None
+	        cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None
+	        view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]);  cat_2 = None
+	        mm_25 = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None
+	        permute_158 = torch.ops.aten.permute.default(view_193, [1, 0])
+	        mm_26 = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None
+	        permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None
+	        sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None
+	        view_194 = torch.ops.aten.view.default(sum_36, [2304]);  sum_36 = None
+	        permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None
+	        view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]);  mm_25 = None
+	        mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None
+	        mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768)
+	        sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)
+	        mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72);  mul_168 = None
+	        sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None
+	        mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38);  sum_38 = None
+	        sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None
+	        sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None
+	        mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None
+	        mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72);  mul_72 = None
+	        sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None
+	        sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None
+	        add_110 = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None
+	        view_196 = torch.ops.aten.view.default(add_110, [64, 768])
+	        mm_27 = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None
+	        permute_162 = torch.ops.aten.permute.default(view_196, [1, 0])
+	        mm_28 = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None
+	        permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None
+	        sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None
+	        view_197 = torch.ops.aten.view.default(sum_41, [768]);  sum_41 = None
+	        permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None
+	        view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]);  mm_27 = None
+	        view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]);  addmm_34 = None
+	        mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None
+	        pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None
+	        mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0)
+	        mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None
+	        mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8);  tanh_8 = None
+	        sub_49 = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None
+	        mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None
+	        mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None
+	        mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715)
+	        pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None
+	        mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None
+	        mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None
+	        add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None
+	        mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None
+	        add_112 = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None
+	        view_199 = torch.ops.aten.view.default(add_112, [64, 3072]);  add_112 = None
+	        mm_29 = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None
+	        permute_166 = torch.ops.aten.permute.default(view_199, [1, 0])
+	        mm_30 = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None
+	        permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None
+	        sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None
+	        view_200 = torch.ops.aten.view.default(sum_42, [3072]);  sum_42 = None
+	        permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None
+	        view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]);  mm_29 = None
+	        mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None
+	        mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768)
+	        sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)
+	        mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66);  mul_184 = None
+	        sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None
+	        mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44);  sum_44 = None
+	        sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None
+	        sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None
+	        mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None
+	        mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66);  mul_66 = None
+	        sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None
+	        sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None
+	        add_113 = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None
+	        view_202 = torch.ops.aten.view.default(add_113, [64, 768])
+	        mm_31 = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None
+	        permute_170 = torch.ops.aten.permute.default(view_202, [1, 0])
+	        permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None
+	        mm_32 = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None
+	        permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None
+	        sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None
+	        view_203 = torch.ops.aten.view.default(sum_47, [768]);  sum_47 = None
+	        permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None
+	        view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]);  mm_31 = None
+	        view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]);  view_204 = None
+	        permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None
+	        _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None
+	        getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0]
+	        getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1]
+	        getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None
+	        permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None
+	        view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]);  permute_174 = None
+	        permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None
+	        view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]);  permute_175 = None
+	        permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None
+	        view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]);  permute_176 = None
+	        cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None
+	        view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]);  cat_3 = None
+	        mm_33 = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None
+	        permute_178 = torch.ops.aten.permute.default(view_209, [1, 0])
+	        mm_34 = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None
+	        permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None
+	        sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None
+	        view_210 = torch.ops.aten.view.default(sum_48, [2304]);  sum_48 = None
+	        permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None
+	        view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]);  mm_33 = None
+	        mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None
+	        mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768)
+	        sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)
+	        mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64);  mul_191 = None
+	        sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None
+	        mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50);  sum_50 = None
+	        sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None
+	        sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None
+	        mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None
+	        mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64);  mul_64 = None
+	        sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None
+	        sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None
+	        add_114 = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None
+	        view_212 = torch.ops.aten.view.default(add_114, [64, 768])
+	        mm_35 = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None
+	        permute_182 = torch.ops.aten.permute.default(view_212, [1, 0])
+	        mm_36 = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None
+	        permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None
+	        sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None
+	        view_213 = torch.ops.aten.view.default(sum_53, [768]);  sum_53 = None
+	        permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None
+	        view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]);  mm_35 = None
+	        view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]);  addmm_30 = None
+	        mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None
+	        pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None
+	        mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0)
+	        mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None
+	        mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7);  tanh_7 = None
+	        sub_56 = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None
+	        mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None
+	        mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None
+	        mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715)
+	        pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None
+	        mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None
+	        mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None
+	        add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None
+	        mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None
+	        add_116 = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None
+	        view_215 = torch.ops.aten.view.default(add_116, [64, 3072]);  add_116 = None
+	        mm_37 = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None
+	        permute_186 = torch.ops.aten.permute.default(view_215, [1, 0])
+	        mm_38 = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None
+	        permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None
+	        sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None
+	        view_216 = torch.ops.aten.view.default(sum_54, [3072]);  sum_54 = None
+	        permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None
+	        view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]);  mm_37 = None
+	        mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None
+	        mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768)
+	        sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)
+	        mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58);  mul_207 = None
+	        sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None
+	        mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56);  sum_56 = None
+	        sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None
+	        sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None
+	        mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None
+	        mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58);  mul_58 = None
+	        sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None
+	        sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None
+	        add_117 = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None
+	        view_218 = torch.ops.aten.view.default(add_117, [64, 768])
+	        mm_39 = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None
+	        permute_190 = torch.ops.aten.permute.default(view_218, [1, 0])
+	        permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None
+	        mm_40 = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None
+	        permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None
+	        sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None
+	        view_219 = torch.ops.aten.view.default(sum_59, [768]);  sum_59 = None
+	        permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None
+	        view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]);  mm_39 = None
+	        view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]);  view_220 = None
+	        permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None
+	        _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None
+	        getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0]
+	        getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1]
+	        getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None
+	        permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None
+	        view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]);  permute_194 = None
+	        permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None
+	        view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]);  permute_195 = None
+	        permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None
+	        view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]);  permute_196 = None
+	        cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None
+	        view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]);  cat_4 = None
+	        mm_41 = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None
+	        permute_198 = torch.ops.aten.permute.default(view_225, [1, 0])
+	        mm_42 = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None
+	        permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None
+	        sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None
+	        view_226 = torch.ops.aten.view.default(sum_60, [2304]);  sum_60 = None
+	        permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None
+	        view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]);  mm_41 = None
+	        mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None
+	        mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768)
+	        sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)
+	        mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56);  mul_214 = None
+	        sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None
+	        mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62);  sum_62 = None
+	        sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None
+	        sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None
+	        mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None
+	        mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56);  mul_56 = None
+	        sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None
+	        sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None
+	        add_118 = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None
+	        view_228 = torch.ops.aten.view.default(add_118, [64, 768])
+	        mm_43 = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None
+	        permute_202 = torch.ops.aten.permute.default(view_228, [1, 0])
+	        mm_44 = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None
+	        permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None
+	        sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None
+	        view_229 = torch.ops.aten.view.default(sum_65, [768]);  sum_65 = None
+	        permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None
+	        view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]);  mm_43 = None
+	        view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]);  addmm_26 = None
+	        mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None
+	        pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None
+	        mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0)
+	        mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None
+	        mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6);  tanh_6 = None
+	        sub_63 = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None
+	        mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None
+	        mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None
+	        mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715)
+	        pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None
+	        mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None
+	        mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None
+	        add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None
+	        mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None
+	        add_120 = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None
+	        view_231 = torch.ops.aten.view.default(add_120, [64, 3072]);  add_120 = None
+	        mm_45 = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None
+	        permute_206 = torch.ops.aten.permute.default(view_231, [1, 0])
+	        mm_46 = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None
+	        permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None
+	        sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None
+	        view_232 = torch.ops.aten.view.default(sum_66, [3072]);  sum_66 = None
+	        permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None
+	        view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]);  mm_45 = None
+	        mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None
+	        mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768)
+	        sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)
+	        mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50);  mul_230 = None
+	        sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None
+	        mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68);  sum_68 = None
+	        sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None
+	        sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None
+	        mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None
+	        mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50);  mul_50 = None
+	        sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None
+	        sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None
+	        add_121 = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None
+	        view_234 = torch.ops.aten.view.default(add_121, [64, 768])
+	        mm_47 = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None
+	        permute_210 = torch.ops.aten.permute.default(view_234, [1, 0])
+	        permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None
+	        mm_48 = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None
+	        permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None
+	        sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None
+	        view_235 = torch.ops.aten.view.default(sum_71, [768]);  sum_71 = None
+	        permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None
+	        view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]);  mm_47 = None
+	        view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]);  view_236 = None
+	        permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None
+	        _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None
+	        getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0]
+	        getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1]
+	        getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None
+	        permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None
+	        view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]);  permute_214 = None
+	        permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None
+	        view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]);  permute_215 = None
+	        permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None
+	        view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]);  permute_216 = None
+	        cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None
+	        view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]);  cat_5 = None
+	        mm_49 = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None
+	        permute_218 = torch.ops.aten.permute.default(view_241, [1, 0])
+	        mm_50 = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None
+	        permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None
+	        sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None
+	        view_242 = torch.ops.aten.view.default(sum_72, [2304]);  sum_72 = None
+	        permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None
+	        view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]);  mm_49 = None
+	        mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None
+	        mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768)
+	        sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)
+	        mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48);  mul_237 = None
+	        sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None
+	        mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74);  sum_74 = None
+	        sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None
+	        sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None
+	        mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None
+	        mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48);  mul_48 = None
+	        sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None
+	        sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None
+	        add_122 = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None
+	        view_244 = torch.ops.aten.view.default(add_122, [64, 768])
+	        mm_51 = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None
+	        permute_222 = torch.ops.aten.permute.default(view_244, [1, 0])
+	        mm_52 = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None
+	        permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None
+	        sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None
+	        view_245 = torch.ops.aten.view.default(sum_77, [768]);  sum_77 = None
+	        permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None
+	        view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]);  mm_51 = None
+	        view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]);  addmm_22 = None
+	        mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None
+	        pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None
+	        mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0)
+	        mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None
+	        mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5);  tanh_5 = None
+	        sub_70 = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None
+	        mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None
+	        mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None
+	        mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715)
+	        pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None
+	        mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None
+	        mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None
+	        add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None
+	        mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None
+	        add_124 = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None
+	        view_247 = torch.ops.aten.view.default(add_124, [64, 3072]);  add_124 = None
+	        mm_53 = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None
+	        permute_226 = torch.ops.aten.permute.default(view_247, [1, 0])
+	        mm_54 = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None
+	        permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None
+	        sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None
+	        view_248 = torch.ops.aten.view.default(sum_78, [3072]);  sum_78 = None
+	        permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None
+	        view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]);  mm_53 = None
+	        mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None
+	        mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768)
+	        sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)
+	        mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42);  mul_253 = None
+	        sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None
+	        mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80);  sum_80 = None
+	        sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None
+	        sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None
+	        mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None
+	        mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42);  mul_42 = None
+	        sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None
+	        sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None
+	        add_125 = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None
+	        view_250 = torch.ops.aten.view.default(add_125, [64, 768])
+	        mm_55 = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None
+	        permute_230 = torch.ops.aten.permute.default(view_250, [1, 0])
+	        permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None
+	        mm_56 = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None
+	        permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None
+	        sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None
+	        view_251 = torch.ops.aten.view.default(sum_83, [768]);  sum_83 = None
+	        permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None
+	        view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]);  mm_55 = None
+	        view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]);  view_252 = None
+	        permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None
+	        _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None
+	        getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0]
+	        getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1]
+	        getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None
+	        permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None
+	        view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]);  permute_234 = None
+	        permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None
+	        view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]);  permute_235 = None
+	        permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None
+	        view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]);  permute_236 = None
+	        cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None
+	        view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]);  cat_6 = None
+	        mm_57 = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None
+	        permute_238 = torch.ops.aten.permute.default(view_257, [1, 0])
+	        mm_58 = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None
+	        permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None
+	        sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None
+	        view_258 = torch.ops.aten.view.default(sum_84, [2304]);  sum_84 = None
+	        permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None
+	        view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]);  mm_57 = None
+	        mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None
+	        mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768)
+	        sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)
+	        mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40);  mul_260 = None
+	        sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None
+	        mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86);  sum_86 = None
+	        sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None
+	        sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None
+	        mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None
+	        mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40);  mul_40 = None
+	        sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None
+	        sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None
+	        add_126 = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None
+	        view_260 = torch.ops.aten.view.default(add_126, [64, 768])
+	        mm_59 = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None
+	        permute_242 = torch.ops.aten.permute.default(view_260, [1, 0])
+	        mm_60 = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None
+	        permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None
+	        sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None
+	        view_261 = torch.ops.aten.view.default(sum_89, [768]);  sum_89 = None
+	        permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None
+	        view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]);  mm_59 = None
+	        view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]);  addmm_18 = None
+	        mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None
+	        pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None
+	        mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0)
+	        mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None
+	        mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4);  tanh_4 = None
+	        sub_77 = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None
+	        mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None
+	        mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None
+	        mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715)
+	        pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None
+	        mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None
+	        mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None
+	        add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None
+	        mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None
+	        add_128 = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None
+	        view_263 = torch.ops.aten.view.default(add_128, [64, 3072]);  add_128 = None
+	        mm_61 = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None
+	        permute_246 = torch.ops.aten.permute.default(view_263, [1, 0])
+	        mm_62 = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None
+	        permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None
+	        sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None
+	        view_264 = torch.ops.aten.view.default(sum_90, [3072]);  sum_90 = None
+	        permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None
+	        view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]);  mm_61 = None
+	        mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None
+	        mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768)
+	        sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)
+	        mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34);  mul_276 = None
+	        sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None
+	        mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92);  sum_92 = None
+	        sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None
+	        sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None
+	        mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None
+	        mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34);  mul_34 = None
+	        sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None
+	        sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None
+	        add_129 = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None
+	        view_266 = torch.ops.aten.view.default(add_129, [64, 768])
+	        mm_63 = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None
+	        permute_250 = torch.ops.aten.permute.default(view_266, [1, 0])
+	        permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None
+	        mm_64 = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None
+	        permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None
+	        sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None
+	        view_267 = torch.ops.aten.view.default(sum_95, [768]);  sum_95 = None
+	        permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None
+	        view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]);  mm_63 = None
+	        view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]);  view_268 = None
+	        permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None
+	        _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None
+	        getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0]
+	        getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1]
+	        getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None
+	        permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None
+	        view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]);  permute_254 = None
+	        permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None
+	        view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]);  permute_255 = None
+	        permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None
+	        view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]);  permute_256 = None
+	        cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None
+	        view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]);  cat_7 = None
+	        mm_65 = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None
+	        permute_258 = torch.ops.aten.permute.default(view_273, [1, 0])
+	        mm_66 = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None
+	        permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None
+	        sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None
+	        view_274 = torch.ops.aten.view.default(sum_96, [2304]);  sum_96 = None
+	        permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None
+	        view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]);  mm_65 = None
+	        mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None
+	        mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768)
+	        sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)
+	        mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32);  mul_283 = None
+	        sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None
+	        mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98);  sum_98 = None
+	        sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None
+	        sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None
+	        mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None
+	        mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32);  mul_32 = None
+	        sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None
+	        sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None
+	        add_130 = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None
+	        view_276 = torch.ops.aten.view.default(add_130, [64, 768])
+	        mm_67 = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None
+	        permute_262 = torch.ops.aten.permute.default(view_276, [1, 0])
+	        mm_68 = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None
+	        permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None
+	        sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None
+	        view_277 = torch.ops.aten.view.default(sum_101, [768]);  sum_101 = None
+	        permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None
+	        view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]);  mm_67 = None
+	        view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]);  addmm_14 = None
+	        mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None
+	        pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None
+	        mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0)
+	        mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None
+	        mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3);  tanh_3 = None
+	        sub_84 = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None
+	        mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None
+	        mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None
+	        mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715)
+	        pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None
+	        mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None
+	        mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None
+	        add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None
+	        mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None
+	        add_132 = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None
+	        view_279 = torch.ops.aten.view.default(add_132, [64, 3072]);  add_132 = None
+	        mm_69 = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None
+	        permute_266 = torch.ops.aten.permute.default(view_279, [1, 0])
+	        mm_70 = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None
+	        permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None
+	        sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None
+	        view_280 = torch.ops.aten.view.default(sum_102, [3072]);  sum_102 = None
+	        permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None
+	        view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]);  mm_69 = None
+	        mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None
+	        mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768)
+	        sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)
+	        mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26);  mul_299 = None
+	        sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None
+	        mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104);  sum_104 = None
+	        sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None
+	        sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None
+	        mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None
+	        mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26);  mul_26 = None
+	        sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None
+	        sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None
+	        add_133 = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None
+	        view_282 = torch.ops.aten.view.default(add_133, [64, 768])
+	        mm_71 = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None
+	        permute_270 = torch.ops.aten.permute.default(view_282, [1, 0])
+	        permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None
+	        mm_72 = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None
+	        permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None
+	        sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None
+	        view_283 = torch.ops.aten.view.default(sum_107, [768]);  sum_107 = None
+	        permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None
+	        view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]);  mm_71 = None
+	        view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]);  view_284 = None
+	        permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None
+	        _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None
+	        getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0]
+	        getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1]
+	        getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None
+	        permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None
+	        view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]);  permute_274 = None
+	        permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None
+	        view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]);  permute_275 = None
+	        permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None
+	        view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]);  permute_276 = None
+	        cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None
+	        view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]);  cat_8 = None
+	        mm_73 = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None
+	        permute_278 = torch.ops.aten.permute.default(view_289, [1, 0])
+	        mm_74 = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None
+	        permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None
+	        sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None
+	        view_290 = torch.ops.aten.view.default(sum_108, [2304]);  sum_108 = None
+	        permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None
+	        view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]);  mm_73 = None
+	        mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None
+	        mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768)
+	        sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)
+	        mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24);  mul_306 = None
+	        sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None
+	        mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110);  sum_110 = None
+	        sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None
+	        sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None
+	        mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None
+	        mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24);  mul_24 = None
+	        sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None
+	        sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None
+	        add_134 = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None
+	        view_292 = torch.ops.aten.view.default(add_134, [64, 768])
+	        mm_75 = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None
+	        permute_282 = torch.ops.aten.permute.default(view_292, [1, 0])
+	        mm_76 = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None
+	        permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None
+	        sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None
+	        view_293 = torch.ops.aten.view.default(sum_113, [768]);  sum_113 = None
+	        permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None
+	        view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]);  mm_75 = None
+	        view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]);  addmm_10 = None
+	        mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None
+	        pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None
+	        mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0)
+	        mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None
+	        mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2);  tanh_2 = None
+	        sub_91 = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None
+	        mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None
+	        mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None
+	        mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715)
+	        pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None
+	        mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None
+	        mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None
+	        add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None
+	        mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None
+	        add_136 = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None
+	        view_295 = torch.ops.aten.view.default(add_136, [64, 3072]);  add_136 = None
+	        mm_77 = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None
+	        permute_286 = torch.ops.aten.permute.default(view_295, [1, 0])
+	        mm_78 = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None
+	        permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None
+	        sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None
+	        view_296 = torch.ops.aten.view.default(sum_114, [3072]);  sum_114 = None
+	        permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None
+	        view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]);  mm_77 = None
+	        mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None
+	        mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768)
+	        sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)
+	        mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18);  mul_322 = None
+	        sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None
+	        mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116);  sum_116 = None
+	        sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None
+	        sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None
+	        mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None
+	        mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18);  mul_18 = None
+	        sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None
+	        sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None
+	        add_137 = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None
+	        view_298 = torch.ops.aten.view.default(add_137, [64, 768])
+	        mm_79 = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None
+	        permute_290 = torch.ops.aten.permute.default(view_298, [1, 0])
+	        permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None
+	        mm_80 = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None
+	        permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None
+	        sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None
+	        view_299 = torch.ops.aten.view.default(sum_119, [768]);  sum_119 = None
+	        permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None
+	        view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]);  mm_79 = None
+	        view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]);  view_300 = None
+	        permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None
+	        _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None
+	        getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0]
+	        getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1]
+	        getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None
+	        permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None
+	        view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]);  permute_294 = None
+	        permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None
+	        view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]);  permute_295 = None
+	        permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None
+	        view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]);  permute_296 = None
+	        cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None
+	        view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]);  cat_9 = None
+	        mm_81 = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None
+	        permute_298 = torch.ops.aten.permute.default(view_305, [1, 0])
+	        mm_82 = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None
+	        permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None
+	        sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None
+	        view_306 = torch.ops.aten.view.default(sum_120, [2304]);  sum_120 = None
+	        permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None
+	        view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]);  mm_81 = None
+	        mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None
+	        mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768)
+	        sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)
+	        mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16);  mul_329 = None
+	        sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None
+	        mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122);  sum_122 = None
+	        sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None
+	        sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None
+	        mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None
+	        mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16);  mul_16 = None
+	        sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None
+	        sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None
+	        add_138 = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None
+	        view_308 = torch.ops.aten.view.default(add_138, [64, 768])
+	        mm_83 = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None
+	        permute_302 = torch.ops.aten.permute.default(view_308, [1, 0])
+	        mm_84 = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None
+	        permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None
+	        sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None
+	        view_309 = torch.ops.aten.view.default(sum_125, [768]);  sum_125 = None
+	        permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None
+	        view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]);  mm_83 = None
+	        view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]);  addmm_6 = None
+	        mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None
+	        pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None
+	        mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0)
+	        mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None
+	        mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1);  tanh_1 = None
+	        sub_98 = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None
+	        mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None
+	        mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None
+	        mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715)
+	        pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None
+	        mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None
+	        mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None
+	        add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None
+	        mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None
+	        add_140 = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None
+	        view_311 = torch.ops.aten.view.default(add_140, [64, 3072]);  add_140 = None
+	        mm_85 = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None
+	        permute_306 = torch.ops.aten.permute.default(view_311, [1, 0])
+	        mm_86 = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None
+	        permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None
+	        sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None
+	        view_312 = torch.ops.aten.view.default(sum_126, [3072]);  sum_126 = None
+	        permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None
+	        view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]);  mm_85 = None
+	        mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None
+	        mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768)
+	        sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)
+	        mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10);  mul_345 = None
+	        sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None
+	        mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128);  sum_128 = None
+	        sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None
+	        sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None
+	        mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None
+	        mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10);  mul_10 = None
+	        sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None
+	        sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None
+	        add_141 = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None
+	        view_314 = torch.ops.aten.view.default(add_141, [64, 768])
+	        mm_87 = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None
+	        permute_310 = torch.ops.aten.permute.default(view_314, [1, 0])
+	        permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None
+	        mm_88 = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None
+	        permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None
+	        sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None
+	        view_315 = torch.ops.aten.view.default(sum_131, [768]);  sum_131 = None
+	        permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None
+	        view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]);  mm_87 = None
+	        view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]);  view_316 = None
+	        permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None
+	        _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None
+	        getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0]
+	        getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1]
+	        getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None
+	        permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None
+	        view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]);  permute_314 = None
+	        permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None
+	        view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]);  permute_315 = None
+	        permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None
+	        view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]);  permute_316 = None
+	        cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None
+	        view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]);  cat_10 = None
+	        mm_89 = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None
+	        permute_318 = torch.ops.aten.permute.default(view_321, [1, 0])
+	        mm_90 = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None
+	        permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None
+	        sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None
+	        view_322 = torch.ops.aten.view.default(sum_132, [2304]);  sum_132 = None
+	        permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None
+	        view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]);  mm_89 = None
+	        mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None
+	        mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768)
+	        sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)
+	        mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8);  mul_352 = None
+	        sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None
+	        mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134);  sum_134 = None
+	        sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None
+	        sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None
+	        mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None
+	        mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8);  mul_8 = None
+	        sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None
+	        sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None
+	        add_142 = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None
+	        view_324 = torch.ops.aten.view.default(add_142, [64, 768])
+	        mm_91 = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None
+	        permute_322 = torch.ops.aten.permute.default(view_324, [1, 0])
+	        mm_92 = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None
+	        permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None
+	        sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None
+	        view_325 = torch.ops.aten.view.default(sum_137, [768]);  sum_137 = None
+	        permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None
+	        view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]);  mm_91 = None
+	        view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]);  addmm_2 = None
+	        mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None
+	        pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None
+	        mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        add_7 = torch.ops.aten.add.Tensor(tanh, 1.0)
+	        mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None
+	        mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh);  tanh = None
+	        sub_105 = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None
+	        mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None
+	        mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None
+	        mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715)
+	        pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None
+	        mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None
+	        mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None
+	        add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None
+	        mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None
+	        add_144 = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None
+	        view_327 = torch.ops.aten.view.default(add_144, [64, 3072]);  add_144 = None
+	        mm_93 = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None
+	        permute_326 = torch.ops.aten.permute.default(view_327, [1, 0])
+	        mm_94 = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None
+	        permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None
+	        sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None
+	        view_328 = torch.ops.aten.view.default(sum_138, [3072]);  sum_138 = None
+	        permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None
+	        view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]);  mm_93 = None
+	        mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None
+	        mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768)
+	        sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)
+	        mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2);  mul_368 = None
+	        sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None
+	        mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140);  sum_140 = None
+	        sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None
+	        sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None
+	        mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None
+	        mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2);  mul_2 = None
+	        sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None
+	        sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None
+	        add_145 = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None
+	        view_330 = torch.ops.aten.view.default(add_145, [64, 768])
+	        mm_95 = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None
+	        permute_330 = torch.ops.aten.permute.default(view_330, [1, 0])
+	        permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None
+	        mm_96 = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None
+	        permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None
+	        sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None
+	        view_331 = torch.ops.aten.view.default(sum_143, [768]);  sum_143 = None
+	        permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None
+	        view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]);  mm_95 = None
+	        view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]);  view_332 = None
+	        permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None
+	        _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None
+	        getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0]
+	        getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1]
+	        getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None
+	        permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None
+	        view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]);  permute_334 = None
+	        permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None
+	        view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]);  permute_335 = None
+	        permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None
+	        view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]);  permute_336 = None
+	        cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None
+	        view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]);  cat_11 = None
+	        mm_97 = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None
+	        permute_338 = torch.ops.aten.permute.default(view_337, [1, 0])
+	        mm_98 = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None
+	        permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None
+	        sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None
+	        view_338 = torch.ops.aten.view.default(sum_144, [2304]);  sum_144 = None
+	        permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None
+	        view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]);  mm_97 = None
+	        mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None
+	        mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768)
+	        sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)
+	        mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul);  mul_375 = None
+	        sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None
+	        mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146);  sum_146 = None
+	        sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None
+	        sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None
+	        mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None
+	        mul_380 = torch.ops.aten.mul.Tensor(view_339, mul);  mul = None
+	        sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None
+	        sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None
+	        add_146 = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None
+	        eq = torch.ops.aten.eq.Scalar(unsqueeze, -1)
+	        unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None
+	        full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146);  unsqueeze_1 = None
+	        full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True);  full_default_5 = unsqueeze = where = None
+	        eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1)
+	        unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None
+	        where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146);  unsqueeze_2 = full_default_4 = add_146 = None
+	        full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True);  full_default_7 = primals_1 = where_1 = None
+	        add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None
+	        return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)
+	        
+	def load_args(reader):
+	    buf0 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf0, (1, 64), dtype=torch.int64, is_leaf=True)  # primals_1
+	    buf1 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf1, (768,), is_leaf=True)  # primals_4
+	    buf2 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf2, (768,), is_leaf=True)  # primals_10
+	    buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf3, (768,), is_leaf=True)  # primals_16
+	    buf4 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf4, (768,), is_leaf=True)  # primals_22
+	    buf5 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf5, (768,), is_leaf=True)  # primals_28
+	    buf6 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf6, (768,), is_leaf=True)  # primals_34
+	    buf7 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf7, (768,), is_leaf=True)  # primals_40
+	    buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf8, (768,), is_leaf=True)  # primals_46
+	    buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf9, (768,), is_leaf=True)  # primals_52
+	    buf10 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf10, (768,), is_leaf=True)  # primals_58
+	    buf11 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf11, (768,), is_leaf=True)  # primals_64
+	    buf12 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf12, (768,), is_leaf=True)  # primals_70
+	    buf13 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf13, (768,), is_leaf=True)  # primals_76
+	    buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf14, (768,), is_leaf=True)  # primals_82
+	    buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf15, (768,), is_leaf=True)  # primals_88
+	    buf16 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf16, (768,), is_leaf=True)  # primals_94
+	    buf17 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf17, (768,), is_leaf=True)  # primals_100
+	    buf18 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf18, (768,), is_leaf=True)  # primals_106
+	    buf19 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf19, (768,), is_leaf=True)  # primals_112
+	    buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf20, (768,), is_leaf=True)  # primals_118
+	    buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf21, (768,), is_leaf=True)  # primals_124
+	    buf22 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf22, (768,), is_leaf=True)  # primals_130
+	    buf23 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf23, (768,), is_leaf=True)  # primals_136
+	    buf24 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf24, (768,), is_leaf=True)  # primals_142
+	    buf25 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf25, (768,), is_leaf=True)  # primals_148
+	    buf26 = reader.storage(None, 512, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf26, (1, 64), dtype=torch.int64, is_leaf=True)  # unsqueeze
+	    buf27 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf27, (1, 64, 768), is_leaf=True)  # mul
+	    buf28 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf28, (64, 768), is_leaf=True)  # view
+	    buf29 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_1
+	    reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_2
+	    reader.tensor(buf29, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_3
+	    buf30 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf30, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_5
+	    buf31 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf31, (1, 12, 64), is_leaf=True)  # getitem_6
+	    buf32 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf32, (), dtype=torch.int64, is_leaf=True)  # getitem_7
+	    buf33 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf33, (), dtype=torch.int64, is_leaf=True)  # getitem_8
+	    buf34 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf34, (1, 64, 768), is_leaf=True)  # mul_2
+	    buf35 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf35, (64, 768), is_leaf=True)  # view_8
+	    buf36 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf36, (64, 3072), is_leaf=True)  # addmm_2
+	    buf37 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf37, (64, 3072), is_leaf=True)  # view_10
+	    buf38 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf38, (1, 64, 768), is_leaf=True)  # mul_8
+	    buf39 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf39, (64, 768), is_leaf=True)  # view_12
+	    buf40 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_9
+	    reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_10
+	    reader.tensor(buf40, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_11
+	    buf41 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf41, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_16
+	    buf42 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf42, (1, 12, 64), is_leaf=True)  # getitem_17
+	    buf43 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf43, (), dtype=torch.int64, is_leaf=True)  # getitem_18
+	    buf44 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf44, (), dtype=torch.int64, is_leaf=True)  # getitem_19
+	    buf45 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf45, (1, 64, 768), is_leaf=True)  # mul_10
+	    buf46 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf46, (64, 768), is_leaf=True)  # view_20
+	    buf47 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf47, (64, 3072), is_leaf=True)  # addmm_6
+	    buf48 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf48, (64, 3072), is_leaf=True)  # view_22
+	    buf49 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf49, (1, 64, 768), is_leaf=True)  # mul_16
+	    buf50 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf50, (64, 768), is_leaf=True)  # view_24
+	    buf51 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_17
+	    reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_18
+	    reader.tensor(buf51, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_19
+	    buf52 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf52, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_27
+	    buf53 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf53, (1, 12, 64), is_leaf=True)  # getitem_28
+	    buf54 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf54, (), dtype=torch.int64, is_leaf=True)  # getitem_29
+	    buf55 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf55, (), dtype=torch.int64, is_leaf=True)  # getitem_30
+	    buf56 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf56, (1, 64, 768), is_leaf=True)  # mul_18
+	    buf57 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf57, (64, 768), is_leaf=True)  # view_32
+	    buf58 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf58, (64, 3072), is_leaf=True)  # addmm_10
+	    buf59 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf59, (64, 3072), is_leaf=True)  # view_34
+	    buf60 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf60, (1, 64, 768), is_leaf=True)  # mul_24
+	    buf61 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf61, (64, 768), is_leaf=True)  # view_36
+	    buf62 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_25
+	    reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_26
+	    reader.tensor(buf62, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_27
+	    buf63 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf63, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_38
+	    buf64 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf64, (1, 12, 64), is_leaf=True)  # getitem_39
+	    buf65 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf65, (), dtype=torch.int64, is_leaf=True)  # getitem_40
+	    buf66 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf66, (), dtype=torch.int64, is_leaf=True)  # getitem_41
+	    buf67 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf67, (1, 64, 768), is_leaf=True)  # mul_26
+	    buf68 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf68, (64, 768), is_leaf=True)  # view_44
+	    buf69 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf69, (64, 3072), is_leaf=True)  # addmm_14
+	    buf70 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf70, (64, 3072), is_leaf=True)  # view_46
+	    buf71 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf71, (1, 64, 768), is_leaf=True)  # mul_32
+	    buf72 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf72, (64, 768), is_leaf=True)  # view_48
+	    buf73 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_33
+	    reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_34
+	    reader.tensor(buf73, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_35
+	    buf74 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf74, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_49
+	    buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf75, (1, 12, 64), is_leaf=True)  # getitem_50
+	    buf76 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf76, (), dtype=torch.int64, is_leaf=True)  # getitem_51
+	    buf77 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf77, (), dtype=torch.int64, is_leaf=True)  # getitem_52
+	    buf78 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf78, (1, 64, 768), is_leaf=True)  # mul_34
+	    buf79 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf79, (64, 768), is_leaf=True)  # view_56
+	    buf80 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf80, (64, 3072), is_leaf=True)  # addmm_18
+	    buf81 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf81, (64, 3072), is_leaf=True)  # view_58
+	    buf82 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf82, (1, 64, 768), is_leaf=True)  # mul_40
+	    buf83 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf83, (64, 768), is_leaf=True)  # view_60
+	    buf84 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_41
+	    reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_42
+	    reader.tensor(buf84, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_43
+	    buf85 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf85, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_60
+	    buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf86, (1, 12, 64), is_leaf=True)  # getitem_61
+	    buf87 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf87, (), dtype=torch.int64, is_leaf=True)  # getitem_62
+	    buf88 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf88, (), dtype=torch.int64, is_leaf=True)  # getitem_63
+	    buf89 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf89, (1, 64, 768), is_leaf=True)  # mul_42
+	    buf90 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf90, (64, 768), is_leaf=True)  # view_68
+	    buf91 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf91, (64, 3072), is_leaf=True)  # addmm_22
+	    buf92 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf92, (64, 3072), is_leaf=True)  # view_70
+	    buf93 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf93, (1, 64, 768), is_leaf=True)  # mul_48
+	    buf94 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf94, (64, 768), is_leaf=True)  # view_72
+	    buf95 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_49
+	    reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_50
+	    reader.tensor(buf95, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_51
+	    buf96 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf96, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_71
+	    buf97 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf97, (1, 12, 64), is_leaf=True)  # getitem_72
+	    buf98 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf98, (), dtype=torch.int64, is_leaf=True)  # getitem_73
+	    buf99 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf99, (), dtype=torch.int64, is_leaf=True)  # getitem_74
+	    buf100 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf100, (1, 64, 768), is_leaf=True)  # mul_50
+	    buf101 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf101, (64, 768), is_leaf=True)  # view_80
+	    buf102 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf102, (64, 3072), is_leaf=True)  # addmm_26
+	    buf103 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf103, (64, 3072), is_leaf=True)  # view_82
+	    buf104 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf104, (1, 64, 768), is_leaf=True)  # mul_56
+	    buf105 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf105, (64, 768), is_leaf=True)  # view_84
+	    buf106 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_57
+	    reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_58
+	    reader.tensor(buf106, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_59
+	    buf107 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf107, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_82
+	    buf108 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf108, (1, 12, 64), is_leaf=True)  # getitem_83
+	    buf109 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf109, (), dtype=torch.int64, is_leaf=True)  # getitem_84
+	    buf110 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf110, (), dtype=torch.int64, is_leaf=True)  # getitem_85
+	    buf111 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf111, (1, 64, 768), is_leaf=True)  # mul_58
+	    buf112 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf112, (64, 768), is_leaf=True)  # view_92
+	    buf113 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf113, (64, 3072), is_leaf=True)  # addmm_30
+	    buf114 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf114, (64, 3072), is_leaf=True)  # view_94
+	    buf115 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf115, (1, 64, 768), is_leaf=True)  # mul_64
+	    buf116 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf116, (64, 768), is_leaf=True)  # view_96
+	    buf117 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_65
+	    reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_66
+	    reader.tensor(buf117, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_67
+	    buf118 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf118, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_93
+	    buf119 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf119, (1, 12, 64), is_leaf=True)  # getitem_94
+	    buf120 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf120, (), dtype=torch.int64, is_leaf=True)  # getitem_95
+	    buf121 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf121, (), dtype=torch.int64, is_leaf=True)  # getitem_96
+	    buf122 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf122, (1, 64, 768), is_leaf=True)  # mul_66
+	    buf123 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf123, (64, 768), is_leaf=True)  # view_104
+	    buf124 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf124, (64, 3072), is_leaf=True)  # addmm_34
+	    buf125 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf125, (64, 3072), is_leaf=True)  # view_106
+	    buf126 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf126, (1, 64, 768), is_leaf=True)  # mul_72
+	    buf127 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf127, (64, 768), is_leaf=True)  # view_108
+	    buf128 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_73
+	    reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_74
+	    reader.tensor(buf128, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_75
+	    buf129 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf129, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_104
+	    buf130 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf130, (1, 12, 64), is_leaf=True)  # getitem_105
+	    buf131 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf131, (), dtype=torch.int64, is_leaf=True)  # getitem_106
+	    buf132 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf132, (), dtype=torch.int64, is_leaf=True)  # getitem_107
+	    buf133 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf133, (1, 64, 768), is_leaf=True)  # mul_74
+	    buf134 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf134, (64, 768), is_leaf=True)  # view_116
+	    buf135 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf135, (64, 3072), is_leaf=True)  # addmm_38
+	    buf136 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf136, (64, 3072), is_leaf=True)  # view_118
+	    buf137 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf137, (1, 64, 768), is_leaf=True)  # mul_80
+	    buf138 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf138, (64, 768), is_leaf=True)  # view_120
+	    buf139 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_81
+	    reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_82
+	    reader.tensor(buf139, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_83
+	    buf140 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf140, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_115
+	    buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf141, (1, 12, 64), is_leaf=True)  # getitem_116
+	    buf142 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf142, (), dtype=torch.int64, is_leaf=True)  # getitem_117
+	    buf143 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf143, (), dtype=torch.int64, is_leaf=True)  # getitem_118
+	    buf144 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf144, (1, 64, 768), is_leaf=True)  # mul_82
+	    buf145 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf145, (64, 768), is_leaf=True)  # view_128
+	    buf146 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf146, (64, 3072), is_leaf=True)  # addmm_42
+	    buf147 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf147, (64, 3072), is_leaf=True)  # view_130
+	    buf148 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf148, (1, 64, 768), is_leaf=True)  # mul_88
+	    buf149 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf149, (64, 768), is_leaf=True)  # view_132
+	    buf150 = reader.storage(None, 589824, device=device(type='cuda', index=0))
+	    reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=768, is_leaf=True)  # permute_89
+	    reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), is_leaf=True)  # permute_90
+	    reader.tensor(buf150, (1, 12, 64, 64), (147456, 64, 2304, 1), storage_offset=1536, is_leaf=True)  # permute_91
+	    buf151 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf151, (1, 12, 64, 64), (49152, 64, 768, 1), is_leaf=True)  # getitem_126
+	    buf152 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf152, (1, 12, 64), is_leaf=True)  # getitem_127
+	    buf153 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf153, (), dtype=torch.int64, is_leaf=True)  # getitem_128
+	    buf154 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf154, (), dtype=torch.int64, is_leaf=True)  # getitem_129
+	    buf155 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf155, (1, 64, 768), is_leaf=True)  # mul_90
+	    buf156 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf156, (64, 768), is_leaf=True)  # view_140
+	    buf157 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf157, (64, 3072), is_leaf=True)  # addmm_46
+	    buf158 = reader.storage(None, 786432, device=device(type='cuda', index=0))
+	    reader.tensor(buf158, (64, 3072), is_leaf=True)  # view_142
+	    buf159 = reader.storage(None, 196608, device=device(type='cuda', index=0))
+	    reader.tensor(buf159, (1, 64, 768), is_leaf=True)  # mul_96
+	    buf160 = reader.storage(None, 8, device=device(type='cuda', index=0), dtype_hint=torch.int64)
+	    reader.tensor(buf160, (1,), dtype=torch.int64, is_leaf=True)  # full_default
+	    buf161 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf161, (1, 768), is_leaf=True)  # view_144
+	    buf162 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf162, (50304, 768), is_leaf=True)  # permute_99
+	    buf163 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf163, (1, 64, 1), is_leaf=True)  # div
+	    buf164 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf164, (768, 3072), is_leaf=True)  # permute_101
+	    buf165 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf165, (3072, 768), is_leaf=True)  # permute_105
+	    buf166 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf166, (1, 64, 1), is_leaf=True)  # div_1
+	    buf167 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf167, (768, 768), is_leaf=True)  # permute_109
+	    buf168 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf168, (2304, 768), is_leaf=True)  # permute_117
+	    buf169 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf169, (1, 64, 1), is_leaf=True)  # div_2
+	    buf170 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf170, (768, 3072), is_leaf=True)  # permute_121
+	    buf171 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf171, (3072, 768), is_leaf=True)  # permute_125
+	    buf172 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf172, (1, 64, 1), is_leaf=True)  # div_3
+	    buf173 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf173, (768, 768), is_leaf=True)  # permute_129
+	    buf174 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf174, (2304, 768), is_leaf=True)  # permute_137
+	    buf175 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf175, (1, 64, 1), is_leaf=True)  # div_4
+	    buf176 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf176, (768, 3072), is_leaf=True)  # permute_141
+	    buf177 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf177, (3072, 768), is_leaf=True)  # permute_145
+	    buf178 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf178, (1, 64, 1), is_leaf=True)  # div_5
+	    buf179 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf179, (768, 768), is_leaf=True)  # permute_149
+	    buf180 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf180, (2304, 768), is_leaf=True)  # permute_157
+	    buf181 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf181, (1, 64, 1), is_leaf=True)  # div_6
+	    buf182 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf182, (768, 3072), is_leaf=True)  # permute_161
+	    buf183 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf183, (3072, 768), is_leaf=True)  # permute_165
+	    buf184 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf184, (1, 64, 1), is_leaf=True)  # div_7
+	    buf185 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf185, (768, 768), is_leaf=True)  # permute_169
+	    buf186 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf186, (2304, 768), is_leaf=True)  # permute_177
+	    buf187 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf187, (1, 64, 1), is_leaf=True)  # div_8
+	    buf188 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf188, (768, 3072), is_leaf=True)  # permute_181
+	    buf189 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf189, (3072, 768), is_leaf=True)  # permute_185
+	    buf190 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf190, (1, 64, 1), is_leaf=True)  # div_9
+	    buf191 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf191, (768, 768), is_leaf=True)  # permute_189
+	    buf192 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf192, (2304, 768), is_leaf=True)  # permute_197
+	    buf193 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf193, (1, 64, 1), is_leaf=True)  # div_10
+	    buf194 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf194, (768, 3072), is_leaf=True)  # permute_201
+	    buf195 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf195, (3072, 768), is_leaf=True)  # permute_205
+	    buf196 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf196, (1, 64, 1), is_leaf=True)  # div_11
+	    buf197 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf197, (768, 768), is_leaf=True)  # permute_209
+	    buf198 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf198, (2304, 768), is_leaf=True)  # permute_217
+	    buf199 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf199, (1, 64, 1), is_leaf=True)  # div_12
+	    buf200 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf200, (768, 3072), is_leaf=True)  # permute_221
+	    buf201 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf201, (3072, 768), is_leaf=True)  # permute_225
+	    buf202 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf202, (1, 64, 1), is_leaf=True)  # div_13
+	    buf203 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf203, (768, 768), is_leaf=True)  # permute_229
+	    buf204 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf204, (2304, 768), is_leaf=True)  # permute_237
+	    buf205 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf205, (1, 64, 1), is_leaf=True)  # div_14
+	    buf206 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf206, (768, 3072), is_leaf=True)  # permute_241
+	    buf207 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf207, (3072, 768), is_leaf=True)  # permute_245
+	    buf208 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf208, (1, 64, 1), is_leaf=True)  # div_15
+	    buf209 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf209, (768, 768), is_leaf=True)  # permute_249
+	    buf210 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf210, (2304, 768), is_leaf=True)  # permute_257
+	    buf211 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf211, (1, 64, 1), is_leaf=True)  # div_16
+	    buf212 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf212, (768, 3072), is_leaf=True)  # permute_261
+	    buf213 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf213, (3072, 768), is_leaf=True)  # permute_265
+	    buf214 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf214, (1, 64, 1), is_leaf=True)  # div_17
+	    buf215 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf215, (768, 768), is_leaf=True)  # permute_269
+	    buf216 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf216, (2304, 768), is_leaf=True)  # permute_277
+	    buf217 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf217, (1, 64, 1), is_leaf=True)  # div_18
+	    buf218 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf218, (768, 3072), is_leaf=True)  # permute_281
+	    buf219 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf219, (3072, 768), is_leaf=True)  # permute_285
+	    buf220 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf220, (1, 64, 1), is_leaf=True)  # div_19
+	    buf221 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf221, (768, 768), is_leaf=True)  # permute_289
+	    buf222 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf222, (2304, 768), is_leaf=True)  # permute_297
+	    buf223 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf223, (1, 64, 1), is_leaf=True)  # div_20
+	    buf224 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf224, (768, 3072), is_leaf=True)  # permute_301
+	    buf225 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf225, (3072, 768), is_leaf=True)  # permute_305
+	    buf226 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf226, (1, 64, 1), is_leaf=True)  # div_21
+	    buf227 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf227, (768, 768), is_leaf=True)  # permute_309
+	    buf228 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf228, (2304, 768), is_leaf=True)  # permute_317
+	    buf229 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf229, (1, 64, 1), is_leaf=True)  # div_22
+	    buf230 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf230, (768, 3072), is_leaf=True)  # permute_321
+	    buf231 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf231, (3072, 768), is_leaf=True)  # permute_325
+	    buf232 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf232, (1, 64, 1), is_leaf=True)  # div_23
+	    buf233 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf233, (768, 768), is_leaf=True)  # permute_329
+	    buf234 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf234, (2304, 768), is_leaf=True)  # permute_337
+	    buf235 = reader.storage(None, 256, device=device(type='cuda', index=0))
+	    reader.tensor(buf235, (1, 64, 1), is_leaf=True)  # div_24
+	    buf236 = reader.storage(None, 201216, device=device(type='cuda', index=0))
+	    reader.tensor(buf236, (1, 1, 50304), is_leaf=True)  # tangents_1
+	load_args._version = 0
+	mod = Repro()
+	if __name__ == '__main__':
+	    from torch._dynamo.repro.after_aot import run_repro
+	    with torch.no_grad():
+	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
+	        # To run it separately, do 
+	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
+	        # mod(*args)
+V0806 13:56:02.106000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "4225f8db403a314e55d0f9cb21612c99"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "i64[1, 64][64, 1]cuda:0", primals_4: "f32[768][1]cuda:0", primals_10: "f32[768][1]cuda:0", primals_16: "f32[768][1]cuda:0", primals_22: "f32[768][1]cuda:0", primals_28: "f32[768][1]cuda:0", primals_34: "f32[768][1]cuda:0", primals_40: "f32[768][1]cuda:0", primals_46: "f32[768][1]cuda:0", primals_52: "f32[768][1]cuda:0", primals_58: "f32[768][1]cuda:0", primals_64: "f32[768][1]cuda:0", primals_70: "f32[768][1]cuda:0", primals_76: "f32[768][1]cuda:0", primals_82: "f32[768][1]cuda:0", primals_88: "f32[768][1]cuda:0", primals_94: "f32[768][1]cuda:0", primals_100: "f32[768][1]cuda:0", primals_106: "f32[768][1]cuda:0", primals_112: "f32[768][1]cuda:0", primals_118: "f32[768][1]cuda:0", primals_124: "f32[768][1]cuda:0", primals_130: "f32[768][1]cuda:0", primals_136: "f32[768][1]cuda:0", primals_142: "f32[768][1]cuda:0", primals_148: "f32[768][1]cuda:0", unsqueeze: "i64[1, 64][64, 1]cuda:0", mul: "f32[1, 64, 768][49152, 768, 1]cuda:0", view: "f32[64, 768][768, 1]cuda:0", permute_1: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_2: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_3: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_5: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_6: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_7: "i64[][]cuda:0", getitem_8: "i64[][]cuda:0", mul_2: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_8: "f32[64, 768][768, 1]cuda:0", addmm_2: "f32[64, 3072][3072, 1]cuda:0", view_10: "f32[64, 3072][3072, 1]cuda:0", mul_8: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_12: "f32[64, 768][768, 1]cuda:0", permute_9: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_10: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_11: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_16: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_17: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_18: "i64[][]cuda:0", getitem_19: "i64[][]cuda:0", mul_10: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_20: "f32[64, 768][768, 1]cuda:0", addmm_6: "f32[64, 3072][3072, 1]cuda:0", view_22: "f32[64, 3072][3072, 1]cuda:0", mul_16: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_24: "f32[64, 768][768, 1]cuda:0", permute_17: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_18: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_19: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_27: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_28: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_29: "i64[][]cuda:0", getitem_30: "i64[][]cuda:0", mul_18: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_32: "f32[64, 768][768, 1]cuda:0", addmm_10: "f32[64, 3072][3072, 1]cuda:0", view_34: "f32[64, 3072][3072, 1]cuda:0", mul_24: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_36: "f32[64, 768][768, 1]cuda:0", permute_25: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_26: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_27: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_38: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_39: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_40: "i64[][]cuda:0", getitem_41: "i64[][]cuda:0", mul_26: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_44: "f32[64, 768][768, 1]cuda:0", addmm_14: "f32[64, 3072][3072, 1]cuda:0", view_46: "f32[64, 3072][3072, 1]cuda:0", mul_32: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_48: "f32[64, 768][768, 1]cuda:0", permute_33: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_34: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_35: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_49: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_50: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_51: "i64[][]cuda:0", getitem_52: "i64[][]cuda:0", mul_34: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_56: "f32[64, 768][768, 1]cuda:0", addmm_18: "f32[64, 3072][3072, 1]cuda:0", view_58: "f32[64, 3072][3072, 1]cuda:0", mul_40: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_60: "f32[64, 768][768, 1]cuda:0", permute_41: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_42: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_43: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_60: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_61: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_62: "i64[][]cuda:0", getitem_63: "i64[][]cuda:0", mul_42: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_68: "f32[64, 768][768, 1]cuda:0", addmm_22: "f32[64, 3072][3072, 1]cuda:0", view_70: "f32[64, 3072][3072, 1]cuda:0", mul_48: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_72: "f32[64, 768][768, 1]cuda:0", permute_49: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_50: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_51: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_71: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_72: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_73: "i64[][]cuda:0", getitem_74: "i64[][]cuda:0", mul_50: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_80: "f32[64, 768][768, 1]cuda:0", addmm_26: "f32[64, 3072][3072, 1]cuda:0", view_82: "f32[64, 3072][3072, 1]cuda:0", mul_56: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_84: "f32[64, 768][768, 1]cuda:0", permute_57: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_58: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_59: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_82: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_83: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_84: "i64[][]cuda:0", getitem_85: "i64[][]cuda:0", mul_58: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_92: "f32[64, 768][768, 1]cuda:0", addmm_30: "f32[64, 3072][3072, 1]cuda:0", view_94: "f32[64, 3072][3072, 1]cuda:0", mul_64: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_96: "f32[64, 768][768, 1]cuda:0", permute_65: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_66: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_67: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_93: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_94: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_95: "i64[][]cuda:0", getitem_96: "i64[][]cuda:0", mul_66: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_104: "f32[64, 768][768, 1]cuda:0", addmm_34: "f32[64, 3072][3072, 1]cuda:0", view_106: "f32[64, 3072][3072, 1]cuda:0", mul_72: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_108: "f32[64, 768][768, 1]cuda:0", permute_73: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_74: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_75: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_104: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_105: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_106: "i64[][]cuda:0", getitem_107: "i64[][]cuda:0", mul_74: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_116: "f32[64, 768][768, 1]cuda:0", addmm_38: "f32[64, 3072][3072, 1]cuda:0", view_118: "f32[64, 3072][3072, 1]cuda:0", mul_80: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_120: "f32[64, 768][768, 1]cuda:0", permute_81: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_82: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_83: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_115: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_116: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_117: "i64[][]cuda:0", getitem_118: "i64[][]cuda:0", mul_82: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_128: "f32[64, 768][768, 1]cuda:0", addmm_42: "f32[64, 3072][3072, 1]cuda:0", view_130: "f32[64, 3072][3072, 1]cuda:0", mul_88: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_132: "f32[64, 768][768, 1]cuda:0", permute_89: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_90: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", permute_91: "f32[1, 12, 64, 64][147456, 64, 2304, 1]cuda:0", getitem_126: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0", getitem_127: "f32[1, 12, 64][768, 64, 1]cuda:0", getitem_128: "i64[][]cuda:0", getitem_129: "i64[][]cuda:0", mul_90: "f32[1, 64, 768][49152, 768, 1]cuda:0", view_140: "f32[64, 768][768, 1]cuda:0", addmm_46: "f32[64, 3072][3072, 1]cuda:0", view_142: "f32[64, 3072][3072, 1]cuda:0", mul_96: "f32[1, 64, 768][49152, 768, 1]cuda:0", full_default: "i64[1][1]cuda:0", view_144: "f32[1, 768][768, 1]cuda:0", permute_99: "f32[50304, 768][768, 1]cuda:0", div: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_101: "f32[768, 3072][3072, 1]cuda:0", permute_105: "f32[3072, 768][768, 1]cuda:0", div_1: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_109: "f32[768, 768][768, 1]cuda:0", permute_117: "f32[2304, 768][768, 1]cuda:0", div_2: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_121: "f32[768, 3072][3072, 1]cuda:0", permute_125: "f32[3072, 768][768, 1]cuda:0", div_3: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_129: "f32[768, 768][768, 1]cuda:0", permute_137: "f32[2304, 768][768, 1]cuda:0", div_4: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_141: "f32[768, 3072][3072, 1]cuda:0", permute_145: "f32[3072, 768][768, 1]cuda:0", div_5: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_149: "f32[768, 768][768, 1]cuda:0", permute_157: "f32[2304, 768][768, 1]cuda:0", div_6: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_161: "f32[768, 3072][3072, 1]cuda:0", permute_165: "f32[3072, 768][768, 1]cuda:0", div_7: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_169: "f32[768, 768][768, 1]cuda:0", permute_177: "f32[2304, 768][768, 1]cuda:0", div_8: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_181: "f32[768, 3072][3072, 1]cuda:0", permute_185: "f32[3072, 768][768, 1]cuda:0", div_9: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_189: "f32[768, 768][768, 1]cuda:0", permute_197: "f32[2304, 768][768, 1]cuda:0", div_10: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_201: "f32[768, 3072][3072, 1]cuda:0", permute_205: "f32[3072, 768][768, 1]cuda:0", div_11: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_209: "f32[768, 768][768, 1]cuda:0", permute_217: "f32[2304, 768][768, 1]cuda:0", div_12: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_221: "f32[768, 3072][3072, 1]cuda:0", permute_225: "f32[3072, 768][768, 1]cuda:0", div_13: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_229: "f32[768, 768][768, 1]cuda:0", permute_237: "f32[2304, 768][768, 1]cuda:0", div_14: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_241: "f32[768, 3072][3072, 1]cuda:0", permute_245: "f32[3072, 768][768, 1]cuda:0", div_15: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_249: "f32[768, 768][768, 1]cuda:0", permute_257: "f32[2304, 768][768, 1]cuda:0", div_16: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_261: "f32[768, 3072][3072, 1]cuda:0", permute_265: "f32[3072, 768][768, 1]cuda:0", div_17: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_269: "f32[768, 768][768, 1]cuda:0", permute_277: "f32[2304, 768][768, 1]cuda:0", div_18: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_281: "f32[768, 3072][3072, 1]cuda:0", permute_285: "f32[3072, 768][768, 1]cuda:0", div_19: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_289: "f32[768, 768][768, 1]cuda:0", permute_297: "f32[2304, 768][768, 1]cuda:0", div_20: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_301: "f32[768, 3072][3072, 1]cuda:0", permute_305: "f32[3072, 768][768, 1]cuda:0", div_21: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_309: "f32[768, 768][768, 1]cuda:0", permute_317: "f32[2304, 768][768, 1]cuda:0", div_22: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_321: "f32[768, 3072][3072, 1]cuda:0", permute_325: "f32[3072, 768][768, 1]cuda:0", div_23: "f32[1, 64, 1][64, 1, 1]cuda:0", permute_329: "f32[768, 768][768, 1]cuda:0", permute_337: "f32[2304, 768][768, 1]cuda:0", div_24: "f32[1, 64, 1][64, 1, 1]cuda:0", tangents_1: "f32[1, 1, 50304][50304, 50304, 1]cuda:0"):
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:238 in forward, code: logits = self.lm_head(
+	        view_146: "f32[1, 50304][50304, 1]cuda:0" = torch.ops.aten.reshape.default(tangents_1, [1, 50304]);  tangents_1 = None
+	        permute_97: "f32[50304, 1][1, 50304]cuda:0" = torch.ops.aten.permute.default(view_146, [1, 0])
+	        mm_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None
+	        permute_98: "f32[768, 50304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None
+	        mm_2: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None
+	        view_147: "f32[1, 1, 768][768, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_2, [1, 1, 768]);  mm_2 = None
+	        permute_100: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:239 in forward, code: x[:, [-1], :]
+	        full_default_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        index_put: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.index_put_.default(full_default_1, [None, full_default], view_147, True);  full_default_1 = full_default = view_147 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_99: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, primals_148);  primals_148 = None
+	        mul_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, 768)
+	        sum_1: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)
+	        mul_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_99, mul_96);  mul_99 = None
+	        sum_2: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None
+	        mul_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_96, sum_2);  sum_2 = None
+	        sub_26: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None
+	        sub_27: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None
+	        mul_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None
+	        mul_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(index_put, mul_96);  mul_96 = None
+	        sum_3: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None
+	        sum_4: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]);  index_put = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_148: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(mul_103, [64, 768])
+	        mm_3: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None
+	        permute_102: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_148, [1, 0])
+	        mm_4: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None
+	        permute_103: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None
+	        sum_5: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None
+	        view_149: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_5, [768]);  sum_5 = None
+	        permute_104: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None
+	        view_150: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_3, [1, 64, 3072]);  mm_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_141: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_46, [1, 64, 3072]);  addmm_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_92: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_141, 0.5)
+	        mul_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)
+	        mul_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None
+	        add_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None
+	        mul_94: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None
+	        tanh_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_94);  mul_94 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_95: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_11, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_106: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_11, tanh_11);  tanh_11 = None
+	        sub_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None
+	        mul_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None
+	        mul_109: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None
+	        mul_110: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_109, 0.044715)
+	        pow_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None
+	        mul_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None
+	        mul_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_99: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_113: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_100: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_151: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_100, [64, 3072]);  add_100 = None
+	        mm_5: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None
+	        permute_106: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_151, [1, 0])
+	        mm_6: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None
+	        permute_107: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None
+	        sum_6: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None
+	        view_152: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_6, [3072]);  sum_6 = None
+	        permute_108: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None
+	        view_153: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_5, [1, 64, 768]);  mm_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_115: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None
+	        mul_116: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, 768)
+	        sum_7: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)
+	        mul_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_115, mul_90);  mul_115 = None
+	        sum_8: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None
+	        mul_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_90, sum_8);  sum_8 = None
+	        sub_30: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None
+	        sub_31: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None
+	        mul_119: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None
+	        mul_120: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_153, mul_90);  mul_90 = None
+	        sum_9: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None
+	        sum_10: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_154: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_101, [64, 768])
+	        mm_7: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None
+	        permute_110: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_154, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_92: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])
+	        view_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_92, [1, 64, 768]);  permute_92 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_138: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_137, [64, 768]);  view_137 = None
+	        mm_8: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None
+	        permute_111: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None
+	        sum_11: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None
+	        view_155: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_11, [768]);  sum_11 = None
+	        permute_112: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None
+	        view_156: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_7, [1, 64, 768]);  mm_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_157: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_156, [1, 64, 12, 64]);  view_156 = None
+	        permute_113: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None
+	        getitem_134: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[0]
+	        getitem_135: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[1]
+	        getitem_136: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_114: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None
+	        view_158: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_114, [1, 64, 768]);  permute_114 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_115: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None
+	        view_159: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_115, [1, 64, 768]);  permute_115 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_116: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None
+	        view_160: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_116, [1, 64, 768]);  permute_116 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None
+	        view_161: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat, [64, 2304]);  cat = None
+	        mm_9: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None
+	        permute_118: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_161, [1, 0])
+	        mm_10: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None
+	        permute_119: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None
+	        sum_12: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None
+	        view_162: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_12, [2304]);  sum_12 = None
+	        permute_120: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None
+	        view_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_9, [1, 64, 768]);  mm_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None
+	        mul_123: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, 768)
+	        sum_13: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)
+	        mul_124: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_122, mul_88);  mul_122 = None
+	        sum_14: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None
+	        mul_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_88, sum_14);  sum_14 = None
+	        sub_33: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None
+	        sub_34: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None
+	        mul_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None
+	        mul_127: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_163, mul_88);  mul_88 = None
+	        sum_15: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None
+	        sum_16: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_102: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_164: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_102, [64, 768])
+	        mm_11: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None
+	        permute_122: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_164, [1, 0])
+	        mm_12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None
+	        permute_123: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None
+	        sum_17: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None
+	        view_165: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_17, [768]);  sum_17 = None
+	        permute_124: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None
+	        view_166: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_11, [1, 64, 3072]);  mm_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_42, [1, 64, 3072]);  addmm_42 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_129, 0.5)
+	        mul_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_11: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)
+	        mul_85: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None
+	        add_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None
+	        mul_86: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None
+	        tanh_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_86);  mul_86 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_87: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_10, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_129: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_130: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_10, tanh_10);  tanh_10 = None
+	        sub_35: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None
+	        mul_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None
+	        mul_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None
+	        mul_133: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_132, 0.044715)
+	        pow_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None
+	        mul_134: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None
+	        mul_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_103: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_104: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_167: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_104, [64, 3072]);  add_104 = None
+	        mm_13: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None
+	        permute_126: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_167, [1, 0])
+	        mm_14: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None
+	        permute_127: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None
+	        sum_18: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None
+	        view_168: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_18, [3072]);  sum_18 = None
+	        permute_128: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None
+	        view_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_13, [1, 64, 768]);  mm_13 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None
+	        mul_139: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, 768)
+	        sum_19: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)
+	        mul_140: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_138, mul_82);  mul_138 = None
+	        sum_20: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None
+	        mul_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_82, sum_20);  sum_20 = None
+	        sub_37: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None
+	        sub_38: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None
+	        mul_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None
+	        mul_143: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_169, mul_82);  mul_82 = None
+	        sum_21: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None
+	        sum_22: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_105: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_170: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_105, [64, 768])
+	        mm_15: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None
+	        permute_130: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_170, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_84: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])
+	        view_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_84, [1, 64, 768]);  permute_84 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_126: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_125, [64, 768]);  view_125 = None
+	        mm_16: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None
+	        permute_131: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None
+	        sum_23: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None
+	        view_171: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_23, [768]);  sum_23 = None
+	        permute_132: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None
+	        view_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_15, [1, 64, 768]);  mm_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_173: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_172, [1, 64, 12, 64]);  view_172 = None
+	        permute_133: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None
+	        getitem_138: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[0]
+	        getitem_139: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[1]
+	        getitem_140: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_134: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None
+	        view_174: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_134, [1, 64, 768]);  permute_134 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_135: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None
+	        view_175: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_135, [1, 64, 768]);  permute_135 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_136: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None
+	        view_176: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_136, [1, 64, 768]);  permute_136 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_1: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None
+	        view_177: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_1, [64, 2304]);  cat_1 = None
+	        mm_17: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None
+	        permute_138: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_177, [1, 0])
+	        mm_18: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None
+	        permute_139: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None
+	        sum_24: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None
+	        view_178: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_24, [2304]);  sum_24 = None
+	        permute_140: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None
+	        view_179: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_17, [1, 64, 768]);  mm_17 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None
+	        mul_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, 768)
+	        sum_25: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)
+	        mul_147: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_145, mul_80);  mul_145 = None
+	        sum_26: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None
+	        mul_148: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_80, sum_26);  sum_26 = None
+	        sub_40: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None
+	        sub_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None
+	        mul_149: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None
+	        mul_150: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_179, mul_80);  mul_80 = None
+	        sum_27: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None
+	        sum_28: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_106: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_180: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_106, [64, 768])
+	        mm_19: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None
+	        permute_142: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_180, [1, 0])
+	        mm_20: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None
+	        permute_143: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None
+	        sum_29: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None
+	        view_181: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_29, [768]);  sum_29 = None
+	        permute_144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None
+	        view_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_19, [1, 64, 3072]);  mm_19 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_117: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_38, [1, 64, 3072]);  addmm_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_76: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_117, 0.5)
+	        mul_151: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_10: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)
+	        mul_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None
+	        add_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None
+	        mul_78: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None
+	        tanh_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_78);  mul_78 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_79: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_9, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_152: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_153: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_9, tanh_9);  tanh_9 = None
+	        sub_42: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None
+	        mul_154: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None
+	        mul_155: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None
+	        mul_156: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_155, 0.044715)
+	        pow_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None
+	        mul_157: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None
+	        mul_158: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_107: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_159: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_108: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_183: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_108, [64, 3072]);  add_108 = None
+	        mm_21: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None
+	        permute_146: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_183, [1, 0])
+	        mm_22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None
+	        permute_147: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None
+	        sum_30: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None
+	        view_184: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_30, [3072]);  sum_30 = None
+	        permute_148: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None
+	        view_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_21, [1, 64, 768]);  mm_21 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_161: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None
+	        mul_162: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, 768)
+	        sum_31: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)
+	        mul_163: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_161, mul_74);  mul_161 = None
+	        sum_32: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None
+	        mul_164: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_74, sum_32);  sum_32 = None
+	        sub_44: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None
+	        sub_45: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None
+	        mul_165: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None
+	        mul_166: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_185, mul_74);  mul_74 = None
+	        sum_33: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None
+	        sum_34: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_109: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_186: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_109, [64, 768])
+	        mm_23: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None
+	        permute_150: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_186, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_76: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])
+	        view_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_76, [1, 64, 768]);  permute_76 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_114: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_113, [64, 768]);  view_113 = None
+	        mm_24: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None
+	        permute_151: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None
+	        sum_35: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None
+	        view_187: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_35, [768]);  sum_35 = None
+	        permute_152: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None
+	        view_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_23, [1, 64, 768]);  mm_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_189: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_188, [1, 64, 12, 64]);  view_188 = None
+	        permute_153: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None
+	        getitem_142: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[0]
+	        getitem_143: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[1]
+	        getitem_144: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_154: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None
+	        view_190: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_154, [1, 64, 768]);  permute_154 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_155: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None
+	        view_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_155, [1, 64, 768]);  permute_155 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_156: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None
+	        view_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_156, [1, 64, 768]);  permute_156 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_2: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None
+	        view_193: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_2, [64, 2304]);  cat_2 = None
+	        mm_25: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None
+	        permute_158: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_193, [1, 0])
+	        mm_26: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None
+	        permute_159: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None
+	        sum_36: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None
+	        view_194: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_36, [2304]);  sum_36 = None
+	        permute_160: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None
+	        view_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_25, [1, 64, 768]);  mm_25 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_168: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None
+	        mul_169: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, 768)
+	        sum_37: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)
+	        mul_170: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_168, mul_72);  mul_168 = None
+	        sum_38: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None
+	        mul_171: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_72, sum_38);  sum_38 = None
+	        sub_47: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None
+	        sub_48: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None
+	        mul_172: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None
+	        mul_173: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_195, mul_72);  mul_72 = None
+	        sum_39: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None
+	        sum_40: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_196: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_110, [64, 768])
+	        mm_27: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None
+	        permute_162: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_196, [1, 0])
+	        mm_28: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None
+	        permute_163: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None
+	        sum_41: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None
+	        view_197: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_41, [768]);  sum_41 = None
+	        permute_164: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None
+	        view_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_27, [1, 64, 3072]);  mm_27 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_34, [1, 64, 3072]);  addmm_34 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_68: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_105, 0.5)
+	        mul_174: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)
+	        mul_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None
+	        add_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None
+	        mul_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None
+	        tanh_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_70);  mul_70 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_71: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_8, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_175: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_176: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_8, tanh_8);  tanh_8 = None
+	        sub_49: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None
+	        mul_177: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None
+	        mul_178: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None
+	        mul_179: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_178, 0.044715)
+	        pow_16: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None
+	        mul_180: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None
+	        mul_181: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_111: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_182: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_112: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_199: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_112, [64, 3072]);  add_112 = None
+	        mm_29: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None
+	        permute_166: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_199, [1, 0])
+	        mm_30: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None
+	        permute_167: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None
+	        sum_42: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None
+	        view_200: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_42, [3072]);  sum_42 = None
+	        permute_168: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None
+	        view_201: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_29, [1, 64, 768]);  mm_29 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_184: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None
+	        mul_185: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, 768)
+	        sum_43: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)
+	        mul_186: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_184, mul_66);  mul_184 = None
+	        sum_44: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None
+	        mul_187: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_66, sum_44);  sum_44 = None
+	        sub_51: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None
+	        sub_52: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None
+	        mul_188: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None
+	        mul_189: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_201, mul_66);  mul_66 = None
+	        sum_45: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None
+	        sum_46: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_113: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_202: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_113, [64, 768])
+	        mm_31: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None
+	        permute_170: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_202, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_68: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])
+	        view_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_68, [1, 64, 768]);  permute_68 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_102: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_101, [64, 768]);  view_101 = None
+	        mm_32: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None
+	        permute_171: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None
+	        sum_47: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None
+	        view_203: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_47, [768]);  sum_47 = None
+	        permute_172: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None
+	        view_204: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_31, [1, 64, 768]);  mm_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_205: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_204, [1, 64, 12, 64]);  view_204 = None
+	        permute_173: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None
+	        getitem_146: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[0]
+	        getitem_147: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[1]
+	        getitem_148: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_174: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None
+	        view_206: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_174, [1, 64, 768]);  permute_174 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_175: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None
+	        view_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_175, [1, 64, 768]);  permute_175 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_176: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None
+	        view_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_176, [1, 64, 768]);  permute_176 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_3: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None
+	        view_209: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_3, [64, 2304]);  cat_3 = None
+	        mm_33: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None
+	        permute_178: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_209, [1, 0])
+	        mm_34: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None
+	        permute_179: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None
+	        sum_48: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None
+	        view_210: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_48, [2304]);  sum_48 = None
+	        permute_180: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None
+	        view_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_33, [1, 64, 768]);  mm_33 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_191: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None
+	        mul_192: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, 768)
+	        sum_49: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)
+	        mul_193: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_191, mul_64);  mul_191 = None
+	        sum_50: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None
+	        mul_194: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_64, sum_50);  sum_50 = None
+	        sub_54: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None
+	        sub_55: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None
+	        mul_195: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None
+	        mul_196: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_211, mul_64);  mul_64 = None
+	        sum_51: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None
+	        sum_52: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_114: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_212: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_114, [64, 768])
+	        mm_35: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None
+	        permute_182: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_212, [1, 0])
+	        mm_36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None
+	        permute_183: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None
+	        sum_53: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None
+	        view_213: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_53, [768]);  sum_53 = None
+	        permute_184: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None
+	        view_214: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_35, [1, 64, 3072]);  mm_35 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_93: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_30, [1, 64, 3072]);  addmm_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_60: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_93, 0.5)
+	        mul_197: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_8: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)
+	        mul_61: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None
+	        add_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None
+	        mul_62: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None
+	        tanh_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_62);  mul_62 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_7, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_198: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_199: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_7, tanh_7);  tanh_7 = None
+	        sub_56: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None
+	        mul_200: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None
+	        mul_201: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None
+	        mul_202: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_201, 0.044715)
+	        pow_17: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None
+	        mul_203: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None
+	        mul_204: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_115: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_205: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_116: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_215: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_116, [64, 3072]);  add_116 = None
+	        mm_37: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None
+	        permute_186: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_215, [1, 0])
+	        mm_38: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None
+	        permute_187: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None
+	        sum_54: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None
+	        view_216: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_54, [3072]);  sum_54 = None
+	        permute_188: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None
+	        view_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_37, [1, 64, 768]);  mm_37 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_207: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None
+	        mul_208: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, 768)
+	        sum_55: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)
+	        mul_209: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_207, mul_58);  mul_207 = None
+	        sum_56: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None
+	        mul_210: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_58, sum_56);  sum_56 = None
+	        sub_58: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None
+	        sub_59: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None
+	        mul_211: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None
+	        mul_212: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_217, mul_58);  mul_58 = None
+	        sum_57: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None
+	        sum_58: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_117: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_218: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_117, [64, 768])
+	        mm_39: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None
+	        permute_190: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_218, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_60: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])
+	        view_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_60, [1, 64, 768]);  permute_60 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_90: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_89, [64, 768]);  view_89 = None
+	        mm_40: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None
+	        permute_191: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None
+	        sum_59: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None
+	        view_219: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_59, [768]);  sum_59 = None
+	        permute_192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None
+	        view_220: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_39, [1, 64, 768]);  mm_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_221: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_220, [1, 64, 12, 64]);  view_220 = None
+	        permute_193: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None
+	        getitem_150: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[0]
+	        getitem_151: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[1]
+	        getitem_152: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_194: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None
+	        view_222: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_194, [1, 64, 768]);  permute_194 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_195: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None
+	        view_223: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_195, [1, 64, 768]);  permute_195 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_196: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None
+	        view_224: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_196, [1, 64, 768]);  permute_196 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_4: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None
+	        view_225: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_4, [64, 2304]);  cat_4 = None
+	        mm_41: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None
+	        permute_198: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_225, [1, 0])
+	        mm_42: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None
+	        permute_199: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None
+	        sum_60: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None
+	        view_226: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_60, [2304]);  sum_60 = None
+	        permute_200: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None
+	        view_227: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_41, [1, 64, 768]);  mm_41 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_214: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None
+	        mul_215: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, 768)
+	        sum_61: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)
+	        mul_216: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_214, mul_56);  mul_214 = None
+	        sum_62: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None
+	        mul_217: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_56, sum_62);  sum_62 = None
+	        sub_61: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None
+	        sub_62: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None
+	        mul_218: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None
+	        mul_219: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_227, mul_56);  mul_56 = None
+	        sum_63: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None
+	        sum_64: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_118: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_228: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_118, [64, 768])
+	        mm_43: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None
+	        permute_202: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_228, [1, 0])
+	        mm_44: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None
+	        permute_203: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None
+	        sum_65: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None
+	        view_229: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_65, [768]);  sum_65 = None
+	        permute_204: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None
+	        view_230: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_43, [1, 64, 3072]);  mm_43 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_81: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_26, [1, 64, 3072]);  addmm_26 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_52: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_81, 0.5)
+	        mul_220: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)
+	        mul_53: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None
+	        add_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None
+	        mul_54: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None
+	        tanh_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_54);  mul_54 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_55: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_6, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_221: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_222: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_6, tanh_6);  tanh_6 = None
+	        sub_63: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None
+	        mul_223: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None
+	        mul_224: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None
+	        mul_225: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_224, 0.044715)
+	        pow_18: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None
+	        mul_226: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None
+	        mul_227: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_119: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_228: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_120: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_231: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_120, [64, 3072]);  add_120 = None
+	        mm_45: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None
+	        permute_206: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_231, [1, 0])
+	        mm_46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None
+	        permute_207: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None
+	        sum_66: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None
+	        view_232: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_66, [3072]);  sum_66 = None
+	        permute_208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None
+	        view_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_45, [1, 64, 768]);  mm_45 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_230: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None
+	        mul_231: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, 768)
+	        sum_67: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)
+	        mul_232: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_230, mul_50);  mul_230 = None
+	        sum_68: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None
+	        mul_233: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_50, sum_68);  sum_68 = None
+	        sub_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None
+	        sub_66: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None
+	        mul_234: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None
+	        mul_235: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_233, mul_50);  mul_50 = None
+	        sum_69: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None
+	        sum_70: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_121: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_234: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_121, [64, 768])
+	        mm_47: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None
+	        permute_210: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_234, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_52: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])
+	        view_77: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_52, [1, 64, 768]);  permute_52 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_78: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_77, [64, 768]);  view_77 = None
+	        mm_48: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None
+	        permute_211: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None
+	        sum_71: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None
+	        view_235: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_71, [768]);  sum_71 = None
+	        permute_212: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None
+	        view_236: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_47, [1, 64, 768]);  mm_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_237: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_236, [1, 64, 12, 64]);  view_236 = None
+	        permute_213: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None
+	        getitem_154: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[0]
+	        getitem_155: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[1]
+	        getitem_156: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_214: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None
+	        view_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_214, [1, 64, 768]);  permute_214 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_215: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None
+	        view_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_215, [1, 64, 768]);  permute_215 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_216: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None
+	        view_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_216, [1, 64, 768]);  permute_216 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_5: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None
+	        view_241: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_5, [64, 2304]);  cat_5 = None
+	        mm_49: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None
+	        permute_218: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_241, [1, 0])
+	        mm_50: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None
+	        permute_219: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None
+	        sum_72: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None
+	        view_242: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_72, [2304]);  sum_72 = None
+	        permute_220: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None
+	        view_243: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_49, [1, 64, 768]);  mm_49 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_237: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None
+	        mul_238: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, 768)
+	        sum_73: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)
+	        mul_239: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_237, mul_48);  mul_237 = None
+	        sum_74: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None
+	        mul_240: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_48, sum_74);  sum_74 = None
+	        sub_68: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None
+	        sub_69: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None
+	        mul_241: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None
+	        mul_242: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_243, mul_48);  mul_48 = None
+	        sum_75: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None
+	        sum_76: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_122: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_244: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_122, [64, 768])
+	        mm_51: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None
+	        permute_222: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_244, [1, 0])
+	        mm_52: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None
+	        permute_223: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None
+	        sum_77: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None
+	        view_245: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_77, [768]);  sum_77 = None
+	        permute_224: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None
+	        view_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_51, [1, 64, 3072]);  mm_51 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_69: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_22, [1, 64, 3072]);  addmm_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_44: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_69, 0.5)
+	        mul_243: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)
+	        mul_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None
+	        add_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None
+	        mul_46: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None
+	        tanh_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_46);  mul_46 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_47: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_5, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_244: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_245: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_5, tanh_5);  tanh_5 = None
+	        sub_70: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None
+	        mul_246: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None
+	        mul_247: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None
+	        mul_248: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_247, 0.044715)
+	        pow_19: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None
+	        mul_249: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None
+	        mul_250: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_123: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_251: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_124: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_247: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_124, [64, 3072]);  add_124 = None
+	        mm_53: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None
+	        permute_226: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_247, [1, 0])
+	        mm_54: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None
+	        permute_227: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None
+	        sum_78: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None
+	        view_248: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_78, [3072]);  sum_78 = None
+	        permute_228: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None
+	        view_249: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_53, [1, 64, 768]);  mm_53 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_253: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None
+	        mul_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, 768)
+	        sum_79: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)
+	        mul_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_253, mul_42);  mul_253 = None
+	        sum_80: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None
+	        mul_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_42, sum_80);  sum_80 = None
+	        sub_72: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None
+	        sub_73: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None
+	        mul_257: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None
+	        mul_258: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_249, mul_42);  mul_42 = None
+	        sum_81: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None
+	        sum_82: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_125: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_250: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_125, [64, 768])
+	        mm_55: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None
+	        permute_230: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_250, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_44: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])
+	        view_65: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_44, [1, 64, 768]);  permute_44 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_66: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_65, [64, 768]);  view_65 = None
+	        mm_56: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None
+	        permute_231: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None
+	        sum_83: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None
+	        view_251: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_83, [768]);  sum_83 = None
+	        permute_232: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None
+	        view_252: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_55, [1, 64, 768]);  mm_55 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_253: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_252, [1, 64, 12, 64]);  view_252 = None
+	        permute_233: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None
+	        getitem_158: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[0]
+	        getitem_159: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[1]
+	        getitem_160: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_234: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None
+	        view_254: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_234, [1, 64, 768]);  permute_234 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_235: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None
+	        view_255: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_235, [1, 64, 768]);  permute_235 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_236: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None
+	        view_256: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_236, [1, 64, 768]);  permute_236 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_6: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None
+	        view_257: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_6, [64, 2304]);  cat_6 = None
+	        mm_57: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None
+	        permute_238: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_257, [1, 0])
+	        mm_58: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None
+	        permute_239: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None
+	        sum_84: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None
+	        view_258: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_84, [2304]);  sum_84 = None
+	        permute_240: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None
+	        view_259: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_57, [1, 64, 768]);  mm_57 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_260: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None
+	        mul_261: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, 768)
+	        sum_85: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)
+	        mul_262: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_260, mul_40);  mul_260 = None
+	        sum_86: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None
+	        mul_263: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_40, sum_86);  sum_86 = None
+	        sub_75: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None
+	        sub_76: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None
+	        mul_264: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None
+	        mul_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_259, mul_40);  mul_40 = None
+	        sum_87: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None
+	        sum_88: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_126: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_260: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_126, [64, 768])
+	        mm_59: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None
+	        permute_242: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_260, [1, 0])
+	        mm_60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None
+	        permute_243: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None
+	        sum_89: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None
+	        view_261: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_89, [768]);  sum_89 = None
+	        permute_244: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None
+	        view_262: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_59, [1, 64, 3072]);  mm_59 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_57: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_18, [1, 64, 3072]);  addmm_18 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_36: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_57, 0.5)
+	        mul_266: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)
+	        mul_37: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None
+	        add_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None
+	        mul_38: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None
+	        tanh_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_38);  mul_38 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_39: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_4, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_267: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_268: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_4, tanh_4);  tanh_4 = None
+	        sub_77: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None
+	        mul_269: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None
+	        mul_270: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None
+	        mul_271: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_270, 0.044715)
+	        pow_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None
+	        mul_272: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None
+	        mul_273: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_127: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_274: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_128: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_263: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_128, [64, 3072]);  add_128 = None
+	        mm_61: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None
+	        permute_246: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_263, [1, 0])
+	        mm_62: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None
+	        permute_247: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None
+	        sum_90: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None
+	        view_264: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_90, [3072]);  sum_90 = None
+	        permute_248: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None
+	        view_265: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_61, [1, 64, 768]);  mm_61 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_276: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None
+	        mul_277: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, 768)
+	        sum_91: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)
+	        mul_278: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_276, mul_34);  mul_276 = None
+	        sum_92: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None
+	        mul_279: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_34, sum_92);  sum_92 = None
+	        sub_79: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None
+	        sub_80: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None
+	        mul_280: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None
+	        mul_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_265, mul_34);  mul_34 = None
+	        sum_93: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None
+	        sum_94: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_129: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_266: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_129, [64, 768])
+	        mm_63: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None
+	        permute_250: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_266, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_36: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])
+	        view_53: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_36, [1, 64, 768]);  permute_36 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_54: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_53, [64, 768]);  view_53 = None
+	        mm_64: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None
+	        permute_251: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None
+	        sum_95: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None
+	        view_267: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_95, [768]);  sum_95 = None
+	        permute_252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None
+	        view_268: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_63, [1, 64, 768]);  mm_63 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_269: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_268, [1, 64, 12, 64]);  view_268 = None
+	        permute_253: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None
+	        getitem_162: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[0]
+	        getitem_163: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[1]
+	        getitem_164: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_254: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None
+	        view_270: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_254, [1, 64, 768]);  permute_254 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_255: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None
+	        view_271: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_255, [1, 64, 768]);  permute_255 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_256: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None
+	        view_272: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_256, [1, 64, 768]);  permute_256 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_7: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None
+	        view_273: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_7, [64, 2304]);  cat_7 = None
+	        mm_65: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None
+	        permute_258: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_273, [1, 0])
+	        mm_66: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None
+	        permute_259: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None
+	        sum_96: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None
+	        view_274: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_96, [2304]);  sum_96 = None
+	        permute_260: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None
+	        view_275: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_65, [1, 64, 768]);  mm_65 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_283: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None
+	        mul_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, 768)
+	        sum_97: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)
+	        mul_285: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_283, mul_32);  mul_283 = None
+	        sum_98: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None
+	        mul_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_32, sum_98);  sum_98 = None
+	        sub_82: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None
+	        sub_83: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None
+	        mul_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None
+	        mul_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_275, mul_32);  mul_32 = None
+	        sum_99: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None
+	        sum_100: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_130: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_276: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_130, [64, 768])
+	        mm_67: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None
+	        permute_262: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_276, [1, 0])
+	        mm_68: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None
+	        permute_263: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None
+	        sum_101: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None
+	        view_277: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_101, [768]);  sum_101 = None
+	        permute_264: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None
+	        view_278: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_67, [1, 64, 3072]);  mm_67 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_45: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_14, [1, 64, 3072]);  addmm_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_28: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_45, 0.5)
+	        mul_289: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)
+	        mul_29: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None
+	        add_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None
+	        mul_30: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None
+	        tanh_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_30);  mul_30 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_31: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_3, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_290: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_291: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_3, tanh_3);  tanh_3 = None
+	        sub_84: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None
+	        mul_292: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None
+	        mul_293: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None
+	        mul_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_293, 0.044715)
+	        pow_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None
+	        mul_295: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None
+	        mul_296: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_131: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_297: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_132: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_279: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_132, [64, 3072]);  add_132 = None
+	        mm_69: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None
+	        permute_266: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_279, [1, 0])
+	        mm_70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None
+	        permute_267: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None
+	        sum_102: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None
+	        view_280: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_102, [3072]);  sum_102 = None
+	        permute_268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None
+	        view_281: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_69, [1, 64, 768]);  mm_69 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_299: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None
+	        mul_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, 768)
+	        sum_103: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)
+	        mul_301: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_299, mul_26);  mul_299 = None
+	        sum_104: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None
+	        mul_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_26, sum_104);  sum_104 = None
+	        sub_86: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None
+	        sub_87: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None
+	        mul_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None
+	        mul_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_281, mul_26);  mul_26 = None
+	        sum_105: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None
+	        sum_106: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_133: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_282: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_133, [64, 768])
+	        mm_71: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None
+	        permute_270: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_282, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_28: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])
+	        view_41: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_28, [1, 64, 768]);  permute_28 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_42: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_41, [64, 768]);  view_41 = None
+	        mm_72: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None
+	        permute_271: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None
+	        sum_107: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None
+	        view_283: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_107, [768]);  sum_107 = None
+	        permute_272: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None
+	        view_284: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_71, [1, 64, 768]);  mm_71 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_285: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_284, [1, 64, 12, 64]);  view_284 = None
+	        permute_273: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None
+	        getitem_166: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[0]
+	        getitem_167: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[1]
+	        getitem_168: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_274: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None
+	        view_286: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_274, [1, 64, 768]);  permute_274 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_275: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None
+	        view_287: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_275, [1, 64, 768]);  permute_275 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_276: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None
+	        view_288: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_276, [1, 64, 768]);  permute_276 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_8: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None
+	        view_289: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_8, [64, 2304]);  cat_8 = None
+	        mm_73: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None
+	        permute_278: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_289, [1, 0])
+	        mm_74: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None
+	        permute_279: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None
+	        sum_108: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None
+	        view_290: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_108, [2304]);  sum_108 = None
+	        permute_280: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None
+	        view_291: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_73, [1, 64, 768]);  mm_73 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_306: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None
+	        mul_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, 768)
+	        sum_109: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)
+	        mul_308: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_306, mul_24);  mul_306 = None
+	        sum_110: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None
+	        mul_309: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_24, sum_110);  sum_110 = None
+	        sub_89: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None
+	        sub_90: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None
+	        mul_310: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None
+	        mul_311: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_291, mul_24);  mul_24 = None
+	        sum_111: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None
+	        sum_112: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_134: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_292: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_134, [64, 768])
+	        mm_75: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None
+	        permute_282: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_292, [1, 0])
+	        mm_76: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None
+	        permute_283: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None
+	        sum_113: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None
+	        view_293: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_113, [768]);  sum_113 = None
+	        permute_284: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None
+	        view_294: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_75, [1, 64, 3072]);  mm_75 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_33: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_10, [1, 64, 3072]);  addmm_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_20: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_33, 0.5)
+	        mul_312: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_3: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)
+	        mul_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None
+	        add_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None
+	        mul_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None
+	        tanh_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_22);  mul_22 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_2, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_313: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_314: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_2, tanh_2);  tanh_2 = None
+	        sub_91: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None
+	        mul_315: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None
+	        mul_316: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None
+	        mul_317: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_316, 0.044715)
+	        pow_22: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None
+	        mul_318: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None
+	        mul_319: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_135: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_320: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_136: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_295: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_136, [64, 3072]);  add_136 = None
+	        mm_77: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None
+	        permute_286: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_295, [1, 0])
+	        mm_78: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None
+	        permute_287: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None
+	        sum_114: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None
+	        view_296: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_114, [3072]);  sum_114 = None
+	        permute_288: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None
+	        view_297: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_77, [1, 64, 768]);  mm_77 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_322: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None
+	        mul_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, 768)
+	        sum_115: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)
+	        mul_324: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_322, mul_18);  mul_322 = None
+	        sum_116: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None
+	        mul_325: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_18, sum_116);  sum_116 = None
+	        sub_93: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None
+	        sub_94: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None
+	        mul_326: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None
+	        mul_327: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_297, mul_18);  mul_18 = None
+	        sum_117: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None
+	        sum_118: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_137: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_298: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_137, [64, 768])
+	        mm_79: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None
+	        permute_290: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_298, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_20: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])
+	        view_29: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_20, [1, 64, 768]);  permute_20 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_30: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_29, [64, 768]);  view_29 = None
+	        mm_80: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None
+	        permute_291: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None
+	        sum_119: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None
+	        view_299: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_119, [768]);  sum_119 = None
+	        permute_292: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None
+	        view_300: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_79, [1, 64, 768]);  mm_79 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_301: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_300, [1, 64, 12, 64]);  view_300 = None
+	        permute_293: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None
+	        getitem_170: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[0]
+	        getitem_171: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[1]
+	        getitem_172: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_294: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None
+	        view_302: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_294, [1, 64, 768]);  permute_294 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_295: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None
+	        view_303: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_295, [1, 64, 768]);  permute_295 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_296: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None
+	        view_304: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_296, [1, 64, 768]);  permute_296 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_9: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None
+	        view_305: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_9, [64, 2304]);  cat_9 = None
+	        mm_81: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None
+	        permute_298: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_305, [1, 0])
+	        mm_82: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None
+	        permute_299: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None
+	        sum_120: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None
+	        view_306: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_120, [2304]);  sum_120 = None
+	        permute_300: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None
+	        view_307: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_81, [1, 64, 768]);  mm_81 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None
+	        mul_330: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, 768)
+	        sum_121: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)
+	        mul_331: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_329, mul_16);  mul_329 = None
+	        sum_122: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None
+	        mul_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_16, sum_122);  sum_122 = None
+	        sub_96: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None
+	        sub_97: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None
+	        mul_333: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None
+	        mul_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_307, mul_16);  mul_16 = None
+	        sum_123: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None
+	        sum_124: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_138: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_308: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_138, [64, 768])
+	        mm_83: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None
+	        permute_302: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_308, [1, 0])
+	        mm_84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None
+	        permute_303: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None
+	        sum_125: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None
+	        view_309: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_125, [768]);  sum_125 = None
+	        permute_304: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None
+	        view_310: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_83, [1, 64, 3072]);  mm_83 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_21: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_6, [1, 64, 3072]);  addmm_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_12: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_21, 0.5)
+	        mul_335: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_2: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)
+	        mul_13: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None
+	        add_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None
+	        mul_14: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None
+	        tanh_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_14);  mul_14 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_15: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh_1, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_336: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_337: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh_1, tanh_1);  tanh_1 = None
+	        sub_98: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None
+	        mul_338: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None
+	        mul_339: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None
+	        mul_340: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_339, 0.044715)
+	        pow_23: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None
+	        mul_341: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None
+	        mul_342: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_139: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_343: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_140: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_311: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_140, [64, 3072]);  add_140 = None
+	        mm_85: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None
+	        permute_306: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_311, [1, 0])
+	        mm_86: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None
+	        permute_307: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None
+	        sum_126: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None
+	        view_312: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_126, [3072]);  sum_126 = None
+	        permute_308: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None
+	        view_313: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_85, [1, 64, 768]);  mm_85 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_345: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None
+	        mul_346: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, 768)
+	        sum_127: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)
+	        mul_347: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_345, mul_10);  mul_345 = None
+	        sum_128: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None
+	        mul_348: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_10, sum_128);  sum_128 = None
+	        sub_100: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None
+	        sub_101: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None
+	        mul_349: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None
+	        mul_350: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_313, mul_10);  mul_10 = None
+	        sum_129: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None
+	        sum_130: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_141: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_314: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_141, [64, 768])
+	        mm_87: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None
+	        permute_310: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_314, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_12: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])
+	        view_17: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_12, [1, 64, 768]);  permute_12 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_18: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_17, [64, 768]);  view_17 = None
+	        mm_88: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None
+	        permute_311: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None
+	        sum_131: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None
+	        view_315: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_131, [768]);  sum_131 = None
+	        permute_312: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None
+	        view_316: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_87, [1, 64, 768]);  mm_87 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_317: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_316, [1, 64, 12, 64]);  view_316 = None
+	        permute_313: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None
+	        getitem_174: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[0]
+	        getitem_175: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[1]
+	        getitem_176: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_314: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None
+	        view_318: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_314, [1, 64, 768]);  permute_314 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_315: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None
+	        view_319: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_315, [1, 64, 768]);  permute_315 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_316: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None
+	        view_320: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_316, [1, 64, 768]);  permute_316 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_10: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None
+	        view_321: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_10, [64, 2304]);  cat_10 = None
+	        mm_89: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None
+	        permute_318: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_321, [1, 0])
+	        mm_90: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None
+	        permute_319: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None
+	        sum_132: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None
+	        view_322: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_132, [2304]);  sum_132 = None
+	        permute_320: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None
+	        view_323: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_89, [1, 64, 768]);  mm_89 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_352: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None
+	        mul_353: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, 768)
+	        sum_133: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)
+	        mul_354: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_352, mul_8);  mul_352 = None
+	        sum_134: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None
+	        mul_355: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_8, sum_134);  sum_134 = None
+	        sub_103: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None
+	        sub_104: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None
+	        mul_356: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None
+	        mul_357: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_323, mul_8);  mul_8 = None
+	        sum_135: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None
+	        sum_136: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_142: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:130 in forward, code: x = self.c_proj(x)
+	        view_324: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_142, [64, 768])
+	        mm_91: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None
+	        permute_322: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_324, [1, 0])
+	        mm_92: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None
+	        permute_323: "f32[3072, 768][1, 3072]cuda:0" = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None
+	        sum_137: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None
+	        view_325: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_137, [768]);  sum_137 = None
+	        permute_324: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None
+	        view_326: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(mm_91, [1, 64, 3072]);  mm_91 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_9: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.reshape.default(addmm_2, [1, 64, 3072]);  addmm_2 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_4: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_9, 0.5)
+	        mul_358: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        pow_1: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)
+	        mul_5: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None
+	        add_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None
+	        mul_6: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None
+	        tanh: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.tanh.default(mul_6);  mul_6 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:28 in new_gelu, code: 1.0
+	        add_7: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(tanh, 1.0)
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_359: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        mul_360: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(tanh, tanh);  tanh = None
+	        sub_105: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None
+	        mul_361: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None
+	        mul_362: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None
+	        mul_363: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_362, 0.044715)
+	        pow_24: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None
+	        mul_364: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None
+	        mul_365: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:29 in new_gelu, code: + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))
+	        add_143: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        mul_366: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:25 in new_gelu, code: 0.5
+	        add_144: "f32[1, 64, 3072][196608, 3072, 1]cuda:0" = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:128 in forward, code: x = self.c_fc(x)
+	        view_327: "f32[64, 3072][3072, 1]cuda:0" = torch.ops.aten.reshape.default(add_144, [64, 3072]);  add_144 = None
+	        mm_93: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None
+	        permute_326: "f32[3072, 64][1, 3072]cuda:0" = torch.ops.aten.permute.default(view_327, [1, 0])
+	        mm_94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None
+	        permute_327: "f32[768, 3072][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None
+	        sum_138: "f32[1, 3072][3072, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None
+	        view_328: "f32[3072][1]cuda:0" = torch.ops.aten.reshape.default(sum_138, [3072]);  sum_138 = None
+	        permute_328: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None
+	        view_329: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_93, [1, 64, 768]);  mm_93 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_368: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None
+	        mul_369: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, 768)
+	        sum_139: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)
+	        mul_370: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_368, mul_2);  mul_368 = None
+	        sum_140: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None
+	        mul_371: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_2, sum_140);  sum_140 = None
+	        sub_107: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None
+	        sub_108: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None
+	        mul_372: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None
+	        mul_373: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_329, mul_2);  mul_2 = None
+	        sum_141: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None
+	        sum_142: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_145: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_330: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(add_145, [64, 768])
+	        mm_95: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None
+	        permute_330: "f32[768, 64][1, 768]cuda:0" = torch.ops.aten.permute.default(view_330, [1, 0])
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        permute_4: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])
+	        view_5: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_4, [1, 64, 768]);  permute_4 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:116 in forward, code: y = self.resid_dropout(self.c_proj(y))
+	        view_6: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.reshape.default(view_5, [64, 768]);  view_5 = None
+	        mm_96: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None
+	        permute_331: "f32[768, 768][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None
+	        sum_143: "f32[1, 768][768, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None
+	        view_331: "f32[768][1]cuda:0" = torch.ops.aten.reshape.default(sum_143, [768]);  sum_143 = None
+	        permute_332: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None
+	        view_332: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_95, [1, 64, 768]);  mm_95 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:112 in forward, code: y.transpose(1, 2).contiguous().view(B, T, C)
+	        view_333: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.reshape.default(view_332, [1, 64, 12, 64]);  view_332 = None
+	        permute_333: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:96 in forward, code: y = torch.nn.functional.scaled_dot_product_attention(
+	        _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None
+	        getitem_178: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[0]
+	        getitem_179: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[1]
+	        getitem_180: "f32[1, 12, 64, 64][49152, 64, 768, 1]cuda:0" = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:89 in forward, code: v = v.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_334: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None
+	        view_334: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_334, [1, 64, 768]);  permute_334 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:86 in forward, code: q = q.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_335: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None
+	        view_335: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_335, [1, 64, 768]);  permute_335 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:83 in forward, code: k = k.view(B, T, self.n_head, C // self.n_head).transpose(
+	        permute_336: "f32[1, 64, 12, 64][49152, 768, 64, 1]cuda:0" = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None
+	        view_336: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(permute_336, [1, 64, 768]);  permute_336 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:82 in forward, code: q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
+	        cat_11: "f32[1, 64, 2304][147456, 2304, 1]cuda:0" = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None
+	        view_337: "f32[64, 2304][2304, 1]cuda:0" = torch.ops.aten.reshape.default(cat_11, [64, 2304]);  cat_11 = None
+	        mm_97: "f32[64, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None
+	        permute_338: "f32[2304, 64][1, 2304]cuda:0" = torch.ops.aten.permute.default(view_337, [1, 0])
+	        mm_98: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None
+	        permute_339: "f32[768, 2304][1, 768]cuda:0" = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None
+	        sum_144: "f32[1, 2304][2304, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None
+	        view_338: "f32[2304][1]cuda:0" = torch.ops.aten.reshape.default(sum_144, [2304]);  sum_144 = None
+	        permute_340: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None
+	        view_339: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.reshape.default(mm_97, [1, 64, 768]);  mm_97 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        mul_375: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None
+	        mul_376: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, 768)
+	        sum_145: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)
+	        mul_377: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_375, mul);  mul_375 = None
+	        sum_146: "f32[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None
+	        mul_378: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul, sum_146);  sum_146 = None
+	        sub_110: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None
+	        sub_111: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None
+	        mul_379: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None
+	        mul_380: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.mul.Tensor(view_339, mul);  mul = None
+	        sum_147: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None
+	        sum_148: "f32[768][1]cuda:0" = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:43 in forward, code: return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+	        add_146: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:229 in forward, code: pos_emb = self.transformer.wpe(
+	        eq: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(unsqueeze, -1)
+	        unsqueeze_1: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None
+	        full_default_4: "f32[][]cuda:0" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        where: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146);  unsqueeze_1 = None
+	        full_default_5: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put: "f32[1024, 768][768, 1]cuda:0" = torch.ops.prims._unsafe_index_put_.default(full_default_5, [unsqueeze], where, True);  full_default_5 = unsqueeze = where = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        eq_1: "b8[1, 64][64, 1]cuda:0" = torch.ops.aten.eq.Scalar(primals_1, -1)
+	        unsqueeze_2: "b8[1, 64, 1][64, 1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None
+	        where_1: "f32[1, 64, 768][49152, 768, 1]cuda:0" = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146);  unsqueeze_2 = full_default_4 = add_146 = None
+	        full_default_7: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        _unsafe_index_put_1: "f32[50304, 768][768, 1]cuda:0" = torch.ops.prims._unsafe_index_put_.default(full_default_7, [primals_1], where_1, True);  full_default_7 = primals_1 = where_1 = None
+	        
+	         # File: /data/users/jjwu/a/torchbenchmark/torchbenchmark/models/nanogpt/model.py:228 in forward, code: tok_emb = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+	        add_147: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None
+	        return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)
+	        
+V0806 13:56:02.116000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2d283a33d935475de25dab047e665ade"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977762116083.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:02.861000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a0389aeaab0d4cb8071bd0e8e6e32f0d"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977762860959.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:02.862000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "803085ff029a11e4af9c770d41ce84f2"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977762862673.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:02.862000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7712dce48f8ad8817e3e170b1e961c78"}
+	{
+	"name": "code_gen",
+	"ts": 1722977762862772.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:02.875000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0dca915629fd3e1219b6c3b78fe58cee"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977762875354.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:03.608000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a0700e4f4b7d20071f95c979dc81edc1"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977763608181.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:03.608000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a788ecd1af90be6a09828353a73bab87"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977763608547.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:04.390000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d9669c0f1d50d3459be407594119c03f"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977764390154.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:04.390000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e9416b1bde8810f193b722a890680cd4"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977764390432.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:04.425000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c99ea59320891263583aa74dbedac343"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977764425384.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:04.426000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/52/c52u5qz657ryymezk4izvpue77cek4zew6xe5neasnfjpwi55xyg.py"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "d80ad5ece7cde8b3547e3f1f8b38e0f2"}
+	
+	# AOT ID: ['0_backward']
+	from ctypes import c_void_p, c_long
+	import torch
+	import math
+	import random
+	import os
+	import tempfile
+	from math import inf, nan
+	from torch._inductor.hooks import run_intermediate_hooks
+	from torch._inductor.utils import maybe_profile
+	from torch._inductor.codegen.memory_planning import _align as align
+	
+	from torch import device, empty_strided
+	from torch._inductor.async_compile import AsyncCompile
+	from torch._inductor.select_algorithm import extern_kernels
+	from torch._inductor.codegen.multi_kernel import MultiKernelCall
+	
+	aten = torch.ops.aten
+	inductor_ops = torch.ops.inductor
+	_quantized = torch.ops._quantized
+	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
+	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
+	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
+	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
+	alloc_from_pool = torch.ops.inductor._alloc_from_pool
+	async_compile = AsyncCompile()
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/6p/c6paekzbgxqomhhj7h4ugorx23to3z4chkxlso6qupohk2b4t6gi.py
+	# Source Nodes: [], Original ATen: [aten.new_zeros]
+	
+	triton_poi_fused_new_zeros_0 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[65536], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_new_zeros_0', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 49152
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = 0.0
+	    tl.store(out_ptr0 + (x0), tmp0, None)
+	''', device_str='cuda')
+	
+	import triton
+	import triton.language as tl
+	from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph
+	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/oo/coocewyva7nm7367uk2izabwjppr6kuwmmklcabvda7srgo2yjlt.py
+	# Source Nodes: [], Original ATen: [aten.index_put, aten.new_zeros]
+	
+	triton_poi_fused_index_put_new_zeros_1 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[1024], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*i64', 1: '*fp32', 2: '*fp32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_index_put_new_zeros_1', 'mutated_arg_names': ['out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 768
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = xindex < xnumel
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (0))
+	    tmp1 = tl.broadcast_to(tmp0, [XBLOCK])
+	    tmp7 = tl.load(in_ptr1 + (x0), xmask)
+	    tmp2 = tl.full([XBLOCK], 64, tl.int32)
+	    tmp3 = tmp1 + tmp2
+	    tmp4 = tmp1 < 0
+	    tmp5 = tl.where(tmp4, tmp3, tmp1)
+	    tl.device_assert((0 <= tmp5) & (tmp5 < 64), "index out of bounds: 0 <= tmp5 < 64")
+	    tl.atomic_add(out_ptr0 + (x0 + (768*tmp5)), tmp7, xmask, sem='relaxed')
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/cz/cczmlfpjyv3sv4qsg47uepi425idtn7lrru6wzh6tqpngryvmley.py
+	# Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	
+	triton_per_fused_native_layer_norm_backward_2 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_native_layer_norm_backward_2', 'mutated_arg_names': [], 'no_x_dim': True, 'num_load': 4, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, out_ptr2, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp13 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last')
+	    tmp2 = tmp0 * tmp1
+	    tmp3 = tl.broadcast_to(tmp2, [RBLOCK])
+	    tmp5 = tl.where(rmask, tmp3, 0)
+	    tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0))
+	    tmp8 = tmp2 * tmp7
+	    tmp9 = tl.broadcast_to(tmp8, [RBLOCK])
+	    tmp11 = tl.where(rmask, tmp9, 0)
+	    tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0))
+	    tmp14 = 768.0
+	    tmp15 = tmp2 * tmp14
+	    tmp16 = tmp15 - tmp6
+	    tmp17 = tmp7 * tmp12
+	    tmp18 = tmp16 - tmp17
+	    tmp19 = tmp13 * tmp18
+	    tl.store(out_ptr2 + (r1 + (768*x0)), tmp19, rmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ha/chal7ghh7ewcpd75gvkwva2pij3u5ofczztskcbubk3ioaefsaz6.py
+	# Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	
+	triton_per_fused_native_layer_norm_backward_3 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[1024, 64],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32', 5: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_native_layer_norm_backward_3', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, out_ptr0, out_ptr1, xnumel, rnumel, XBLOCK : tl.constexpr):
+	    xnumel = 768
+	    rnumel = 64
+	    RBLOCK: tl.constexpr = 64
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
+	    xmask = xindex < xnumel
+	    rindex = tl.arange(0, RBLOCK)[None, :]
+	    roffset = 0
+	    rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1)
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0 + (768*r1)), xmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (x0 + (768*r1)), xmask, other=0.0)
+	    tmp2 = tmp0 * tmp1
+	    tmp3 = tl.broadcast_to(tmp2, [XBLOCK, RBLOCK])
+	    tmp5 = tl.where(xmask, tmp3, 0)
+	    tmp6 = tl.sum(tmp5, 1)[:, None]
+	    tmp7 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK])
+	    tmp9 = tl.where(xmask, tmp7, 0)
+	    tmp10 = tl.sum(tmp9, 1)[:, None]
+	    tl.store(out_ptr0 + (x0), tmp6, xmask)
+	    tl.store(out_ptr1 + (x0), tmp10, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/oq/coq4a53onebiitbgr5qfnlwe4gtsq6tfdkhu4jc2ah74b22q6frr.py
+	# Source Nodes: [], Original ATen: [aten.sum]
+	
+	triton_per_fused_sum_4 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[1024, 64],
+	    reduction_hint=ReductionHint.OUTER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_4', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr):
+	    xnumel = 768
+	    rnumel = 64
+	    RBLOCK: tl.constexpr = 64
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
+	    xmask = xindex < xnumel
+	    rindex = tl.arange(0, RBLOCK)[None, :]
+	    roffset = 0
+	    rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1)
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0 + (768*r1)), xmask, other=0.0)
+	    tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK])
+	    tmp3 = tl.where(xmask, tmp1, 0)
+	    tmp4 = tl.sum(tmp3, 1)[:, None]
+	    tl.store(out_ptr0 + (x0), tmp4, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/6d/c6doghxt344wzvcsvrtcaftqayqnzocngd56v2yhsnz3v5eopk2f.py
+	# Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	# add_46 => add_94
+	# add_47 => add_95
+	# mul_44 => mul_92
+	# mul_45 => mul_93
+	# mul_46 => mul_94
+	# pow_12 => pow_12
+	# tanh_11 => tanh_11
+	triton_poi_fused_add_mul_pow_tanh_tanh_backward_5 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[262144], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_mul_pow_tanh_tanh_backward_5', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 196608
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = tl.load(in_out_ptr0 + (x0), None)
+	    tmp1 = tl.load(in_ptr0 + (x0), None)
+	    tmp2 = 0.5
+	    tmp3 = tmp1 * tmp2
+	    tmp4 = tmp0 * tmp3
+	    tmp5 = tmp1 * tmp1
+	    tmp6 = tmp5 * tmp1
+	    tmp7 = 0.044715
+	    tmp8 = tmp6 * tmp7
+	    tmp9 = tmp1 + tmp8
+	    tmp10 = 0.7978845608028654
+	    tmp11 = tmp9 * tmp10
+	    tmp12 = libdevice.tanh(tmp11)
+	    tmp13 = tmp12 * tmp12
+	    tmp14 = 1.0
+	    tmp15 = tmp14 - tmp13
+	    tmp16 = tmp4 * tmp15
+	    tmp17 = tmp16 * tmp10
+	    tmp18 = tmp17 * tmp7
+	    tmp19 = 3.0
+	    tmp20 = tmp5 * tmp19
+	    tmp21 = tmp18 * tmp20
+	    tmp22 = tmp17 + tmp21
+	    tmp23 = tmp12 + tmp14
+	    tmp24 = tmp0 * tmp23
+	    tmp25 = tmp24 * tmp2
+	    tmp26 = tmp22 + tmp25
+	    tl.store(in_out_ptr0 + (x0), tmp26, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/tp/ctpgg42pkdcfpn2vurm3qnozt4yd6qbtncxrhvxbhiy6psqbvvig.py
+	# Source Nodes: [], Original ATen: [aten.sum]
+	
+	triton_per_fused_sum_6 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[4096, 64],
+	    reduction_hint=ReductionHint.OUTER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_6', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr):
+	    xnumel = 3072
+	    rnumel = 64
+	    RBLOCK: tl.constexpr = 64
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
+	    xmask = xindex < xnumel
+	    rindex = tl.arange(0, RBLOCK)[None, :]
+	    roffset = 0
+	    rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1)
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0 + (3072*r1)), xmask, other=0.0)
+	    tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK])
+	    tmp3 = tl.where(xmask, tmp1, 0)
+	    tmp4 = tl.sum(tmp3, 1)[:, None]
+	    tl.store(out_ptr0 + (x0), tmp4, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/vc/cvcsfabswaorzljwigmguorguvjnz5ygwefg5jpn2pdnzjiyiunq.py
+	# Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	
+	triton_per_fused_add_native_layer_norm_backward_7 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_native_layer_norm_backward_7', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': True, 'num_load': 5, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp13 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp14 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last')
+	    tmp2 = tmp0 * tmp1
+	    tmp3 = tl.broadcast_to(tmp2, [RBLOCK])
+	    tmp5 = tl.where(rmask, tmp3, 0)
+	    tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0))
+	    tmp8 = tmp2 * tmp7
+	    tmp9 = tl.broadcast_to(tmp8, [RBLOCK])
+	    tmp11 = tl.where(rmask, tmp9, 0)
+	    tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0))
+	    tmp15 = 768.0
+	    tmp16 = tmp2 * tmp15
+	    tmp17 = tmp16 - tmp6
+	    tmp18 = tmp7 * tmp12
+	    tmp19 = tmp17 - tmp18
+	    tmp20 = tmp14 * tmp19
+	    tmp21 = tmp13 + tmp20
+	    tl.store(in_out_ptr0 + (r1 + (768*x0)), tmp21, rmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ar/cardc3vznbmvvhhk6ledk62xbqafohguorygjv35ygndxybgtp4v.py
+	# Source Nodes: [], Original ATen: [aten.cat]
+	
+	triton_poi_fused_cat_8 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[262144], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_cat_8', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 3, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 147456
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex % 2304
+	    x1 = (xindex // 2304)
+	    x2 = xindex
+	    tmp0 = x0
+	    tmp1 = tl.full([1], 0, tl.int64)
+	    tmp2 = tmp0 >= tmp1
+	    tmp3 = tl.full([1], 768, tl.int64)
+	    tmp4 = tmp0 < tmp3
+	    tmp5 = tl.load(in_ptr0 + ((768*x1) + x0), tmp4, eviction_policy='evict_last', other=0.0)
+	    tmp6 = tmp0 >= tmp3
+	    tmp7 = tl.full([1], 1536, tl.int64)
+	    tmp8 = tmp0 < tmp7
+	    tmp9 = tmp6 & tmp8
+	    tmp10 = tl.load(in_ptr1 + ((768*x1) + ((-768) + x0)), tmp9, eviction_policy='evict_last', other=0.0)
+	    tmp11 = tmp0 >= tmp7
+	    tmp12 = tl.full([1], 2304, tl.int64)
+	    tmp13 = tmp0 < tmp12
+	    tmp14 = tl.load(in_ptr2 + ((768*x1) + ((-1536) + x0)), tmp11, eviction_policy='evict_last', other=0.0)
+	    tmp15 = tl.where(tmp9, tmp10, tmp14)
+	    tmp16 = tl.where(tmp4, tmp5, tmp15)
+	    tl.store(out_ptr0 + (x2), tmp16, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/zz/czzbnoytzj35xlzlzkxyfssawly4isbfgis7wkt5vorq5bjtvkuj.py
+	# Source Nodes: [], Original ATen: [aten.sum]
+	
+	triton_per_fused_sum_9 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[4096, 64],
+	    reduction_hint=ReductionHint.OUTER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32', 3: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_sum_9', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 1, 'num_reduction': 1, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_ptr0, out_ptr0, xnumel, rnumel, XBLOCK : tl.constexpr):
+	    xnumel = 2304
+	    rnumel = 64
+	    RBLOCK: tl.constexpr = 64
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:, None]
+	    xmask = xindex < xnumel
+	    rindex = tl.arange(0, RBLOCK)[None, :]
+	    roffset = 0
+	    rmask = tl.full([XBLOCK, RBLOCK], True, tl.int1)
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0 + (2304*r1)), xmask, other=0.0)
+	    tmp1 = tl.broadcast_to(tmp0, [XBLOCK, RBLOCK])
+	    tmp3 = tl.where(xmask, tmp1, 0)
+	    tmp4 = tl.sum(tmp3, 1)[:, None]
+	    tl.store(out_ptr0 + (x0), tmp4, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/es/ces5mfu37mqyysd7bqz7jj4nzmf5iy36y733ltbjl32z622kywum.py
+	# Source Nodes: [], Original ATen: [aten.embedding_dense_backward]
+	
+	triton_poi_fused_embedding_dense_backward_10 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[1048576], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_embedding_dense_backward_10', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 786432
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = 0.0
+	    tl.store(out_ptr0 + (x0), tmp0, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/fb/cfbt4gzmmsu52tt6jq2ejhclvei7g2bvocxc3tptmo2jt2vjqwrw.py
+	# Source Nodes: [], Original ATen: [aten.embedding_dense_backward]
+	
+	triton_poi_fused_embedding_dense_backward_11 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[67108864], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_embedding_dense_backward_11', 'mutated_arg_names': [], 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 38633472
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = 0.0
+	    tl.store(out_ptr0 + (x0), tmp0, None)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ax/cax4blfw6cfz2t7paehqtfiuyq5xduvu52edbcakzqd6prmrjf5o.py
+	# Source Nodes: [], Original ATen: [aten.add, aten.embedding_dense_backward, aten.native_layer_norm_backward]
+	
+	triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.persistent_reduction(
+	    size_hints=[64, 1024],
+	    reduction_hint=ReductionHint.INNER,
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*i64', 6: '*i64', 7: '*fp32', 8: '*fp32', 9: 'i32', 10: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12', 'mutated_arg_names': ['in_out_ptr0', 'out_ptr2', 'out_ptr3'], 'no_x_dim': True, 'num_load': 7, 'num_reduction': 2, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False}
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, out_ptr2, out_ptr3, xnumel, rnumel):
+	    xnumel = 64
+	    XBLOCK: tl.constexpr = 1
+	    rnumel = 768
+	    RBLOCK: tl.constexpr = 1024
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = tl.full([1], xoffset, tl.int32)
+	    xmask = tl.full([RBLOCK], True, tl.int1)
+	    rindex = tl.arange(0, RBLOCK)[:]
+	    roffset = 0
+	    rmask = rindex < rnumel
+	    r1 = rindex
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp1 = tl.load(in_ptr1 + (r1), rmask, eviction_policy='evict_last', other=0.0)
+	    tmp7 = tl.load(in_ptr2 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp13 = tl.load(in_out_ptr0 + (r1 + (768*x0)), rmask, other=0.0)
+	    tmp14 = tl.load(in_ptr3 + (x0), None, eviction_policy='evict_last')
+	    tmp22 = tl.load(in_ptr4 + (x0), None, eviction_policy='evict_last')
+	    tmp31 = tl.load(in_ptr5 + (x0), None, eviction_policy='evict_last')
+	    tmp2 = tmp0 * tmp1
+	    tmp3 = tl.broadcast_to(tmp2, [RBLOCK])
+	    tmp5 = tl.where(rmask, tmp3, 0)
+	    tmp6 = triton_helpers.promote_to_tensor(tl.sum(tmp5, 0))
+	    tmp8 = tmp2 * tmp7
+	    tmp9 = tl.broadcast_to(tmp8, [RBLOCK])
+	    tmp11 = tl.where(rmask, tmp9, 0)
+	    tmp12 = triton_helpers.promote_to_tensor(tl.sum(tmp11, 0))
+	    tmp15 = 768.0
+	    tmp16 = tmp2 * tmp15
+	    tmp17 = tmp16 - tmp6
+	    tmp18 = tmp7 * tmp12
+	    tmp19 = tmp17 - tmp18
+	    tmp20 = tmp14 * tmp19
+	    tmp21 = tmp13 + tmp20
+	    tmp23 = tl.full([RBLOCK], 1024, tl.int32)
+	    tmp24 = tmp22 + tmp23
+	    tmp25 = tmp22 < 0
+	    tmp26 = tl.where(tmp25, tmp24, tmp22)
+	    tmp27 = tl.full([1], -1, tl.int64)
+	    tmp28 = tmp22 == tmp27
+	    tmp29 = 0.0
+	    tmp30 = tl.where(tmp28, tmp29, tmp21)
+	    tmp32 = tl.full([RBLOCK], 50304, tl.int32)
+	    tmp33 = tmp31 + tmp32
+	    tmp34 = tmp31 < 0
+	    tmp35 = tl.where(tmp34, tmp33, tmp31)
+	    tmp36 = tmp31 == tmp27
+	    tmp37 = tl.where(tmp36, tmp29, tmp21)
+	    tl.atomic_add(out_ptr2 + (tl.broadcast_to(r1 + (768*tmp26), [RBLOCK])), tmp30, rmask, sem='relaxed')
+	    tl.atomic_add(out_ptr3 + (tl.broadcast_to(r1 + (768*tmp35), [RBLOCK])), tmp37, rmask, sem='relaxed')
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/os/cosr4qs4msiqxzvk25xnlu3ejk5hkeeghm6yyzr6fh3yejtndtoa.py
+	# Source Nodes: [], Original ATen: [aten.add]
+	
+	triton_poi_fused_add_13 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.pointwise(
+	    size_hints=[67108864], 
+	    filename=__file__,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_add_13', 'mutated_arg_names': ['in_out_ptr0'], 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 38633472
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = tl.full([XBLOCK], True, tl.int1)
+	    x0 = xindex
+	    tmp0 = tl.load(in_out_ptr0 + (x0), None)
+	    tmp1 = tl.load(in_ptr0 + (x0), None)
+	    tmp2 = tmp0 + tmp1
+	    tl.store(in_out_ptr0 + (x0), tmp2, None)
+	''', device_str='cuda')
+	
+	
+	async_compile.wait(globals())
+	del async_compile
+	
+	def call(args):
+	    primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1 = args
+	    args.clear()
+	    assert_size_stride(primals_1, (1, 64), (64, 1))
+	    assert_size_stride(primals_4, (768, ), (1, ))
+	    assert_size_stride(primals_10, (768, ), (1, ))
+	    assert_size_stride(primals_16, (768, ), (1, ))
+	    assert_size_stride(primals_22, (768, ), (1, ))
+	    assert_size_stride(primals_28, (768, ), (1, ))
+	    assert_size_stride(primals_34, (768, ), (1, ))
+	    assert_size_stride(primals_40, (768, ), (1, ))
+	    assert_size_stride(primals_46, (768, ), (1, ))
+	    assert_size_stride(primals_52, (768, ), (1, ))
+	    assert_size_stride(primals_58, (768, ), (1, ))
+	    assert_size_stride(primals_64, (768, ), (1, ))
+	    assert_size_stride(primals_70, (768, ), (1, ))
+	    assert_size_stride(primals_76, (768, ), (1, ))
+	    assert_size_stride(primals_82, (768, ), (1, ))
+	    assert_size_stride(primals_88, (768, ), (1, ))
+	    assert_size_stride(primals_94, (768, ), (1, ))
+	    assert_size_stride(primals_100, (768, ), (1, ))
+	    assert_size_stride(primals_106, (768, ), (1, ))
+	    assert_size_stride(primals_112, (768, ), (1, ))
+	    assert_size_stride(primals_118, (768, ), (1, ))
+	    assert_size_stride(primals_124, (768, ), (1, ))
+	    assert_size_stride(primals_130, (768, ), (1, ))
+	    assert_size_stride(primals_136, (768, ), (1, ))
+	    assert_size_stride(primals_142, (768, ), (1, ))
+	    assert_size_stride(primals_148, (768, ), (1, ))
+	    assert_size_stride(unsqueeze, (1, 64), (64, 1))
+	    assert_size_stride(mul, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view, (64, 768), (768, 1))
+	    assert_size_stride(permute_1, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_2, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_3, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_5, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_6, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_7, (), ())
+	    assert_size_stride(getitem_8, (), ())
+	    assert_size_stride(mul_2, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_8, (64, 768), (768, 1))
+	    assert_size_stride(addmm_2, (64, 3072), (3072, 1))
+	    assert_size_stride(view_10, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_8, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_12, (64, 768), (768, 1))
+	    assert_size_stride(permute_9, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_10, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_11, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_16, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_17, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_18, (), ())
+	    assert_size_stride(getitem_19, (), ())
+	    assert_size_stride(mul_10, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_20, (64, 768), (768, 1))
+	    assert_size_stride(addmm_6, (64, 3072), (3072, 1))
+	    assert_size_stride(view_22, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_16, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_24, (64, 768), (768, 1))
+	    assert_size_stride(permute_17, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_18, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_19, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_27, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_28, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_29, (), ())
+	    assert_size_stride(getitem_30, (), ())
+	    assert_size_stride(mul_18, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_32, (64, 768), (768, 1))
+	    assert_size_stride(addmm_10, (64, 3072), (3072, 1))
+	    assert_size_stride(view_34, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_24, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_36, (64, 768), (768, 1))
+	    assert_size_stride(permute_25, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_26, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_27, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_38, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_39, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_40, (), ())
+	    assert_size_stride(getitem_41, (), ())
+	    assert_size_stride(mul_26, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_44, (64, 768), (768, 1))
+	    assert_size_stride(addmm_14, (64, 3072), (3072, 1))
+	    assert_size_stride(view_46, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_32, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_48, (64, 768), (768, 1))
+	    assert_size_stride(permute_33, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_34, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_35, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_49, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_50, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_51, (), ())
+	    assert_size_stride(getitem_52, (), ())
+	    assert_size_stride(mul_34, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_56, (64, 768), (768, 1))
+	    assert_size_stride(addmm_18, (64, 3072), (3072, 1))
+	    assert_size_stride(view_58, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_40, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_60, (64, 768), (768, 1))
+	    assert_size_stride(permute_41, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_42, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_43, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_60, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_61, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_62, (), ())
+	    assert_size_stride(getitem_63, (), ())
+	    assert_size_stride(mul_42, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_68, (64, 768), (768, 1))
+	    assert_size_stride(addmm_22, (64, 3072), (3072, 1))
+	    assert_size_stride(view_70, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_48, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_72, (64, 768), (768, 1))
+	    assert_size_stride(permute_49, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_50, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_51, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_71, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_72, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_73, (), ())
+	    assert_size_stride(getitem_74, (), ())
+	    assert_size_stride(mul_50, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_80, (64, 768), (768, 1))
+	    assert_size_stride(addmm_26, (64, 3072), (3072, 1))
+	    assert_size_stride(view_82, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_56, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_84, (64, 768), (768, 1))
+	    assert_size_stride(permute_57, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_58, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_59, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_82, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_83, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_84, (), ())
+	    assert_size_stride(getitem_85, (), ())
+	    assert_size_stride(mul_58, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_92, (64, 768), (768, 1))
+	    assert_size_stride(addmm_30, (64, 3072), (3072, 1))
+	    assert_size_stride(view_94, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_64, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_96, (64, 768), (768, 1))
+	    assert_size_stride(permute_65, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_66, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_67, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_93, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_94, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_95, (), ())
+	    assert_size_stride(getitem_96, (), ())
+	    assert_size_stride(mul_66, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_104, (64, 768), (768, 1))
+	    assert_size_stride(addmm_34, (64, 3072), (3072, 1))
+	    assert_size_stride(view_106, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_72, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_108, (64, 768), (768, 1))
+	    assert_size_stride(permute_73, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_74, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_75, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_104, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_105, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_106, (), ())
+	    assert_size_stride(getitem_107, (), ())
+	    assert_size_stride(mul_74, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_116, (64, 768), (768, 1))
+	    assert_size_stride(addmm_38, (64, 3072), (3072, 1))
+	    assert_size_stride(view_118, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_80, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_120, (64, 768), (768, 1))
+	    assert_size_stride(permute_81, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_82, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_83, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_115, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_116, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_117, (), ())
+	    assert_size_stride(getitem_118, (), ())
+	    assert_size_stride(mul_82, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_128, (64, 768), (768, 1))
+	    assert_size_stride(addmm_42, (64, 3072), (3072, 1))
+	    assert_size_stride(view_130, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_88, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_132, (64, 768), (768, 1))
+	    assert_size_stride(permute_89, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_90, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(permute_91, (1, 12, 64, 64), (147456, 64, 2304, 1))
+	    assert_size_stride(getitem_126, (1, 12, 64, 64), (49152, 64, 768, 1))
+	    assert_size_stride(getitem_127, (1, 12, 64), (768, 64, 1))
+	    assert_size_stride(getitem_128, (), ())
+	    assert_size_stride(getitem_129, (), ())
+	    assert_size_stride(mul_90, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(view_140, (64, 768), (768, 1))
+	    assert_size_stride(addmm_46, (64, 3072), (3072, 1))
+	    assert_size_stride(view_142, (64, 3072), (3072, 1))
+	    assert_size_stride(mul_96, (1, 64, 768), (49152, 768, 1))
+	    assert_size_stride(full_default, (1, ), (1, ))
+	    assert_size_stride(view_144, (1, 768), (768, 1))
+	    assert_size_stride(permute_99, (50304, 768), (768, 1))
+	    assert_size_stride(div, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_101, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_105, (3072, 768), (768, 1))
+	    assert_size_stride(div_1, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_109, (768, 768), (768, 1))
+	    assert_size_stride(permute_117, (2304, 768), (768, 1))
+	    assert_size_stride(div_2, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_121, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_125, (3072, 768), (768, 1))
+	    assert_size_stride(div_3, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_129, (768, 768), (768, 1))
+	    assert_size_stride(permute_137, (2304, 768), (768, 1))
+	    assert_size_stride(div_4, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_141, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_145, (3072, 768), (768, 1))
+	    assert_size_stride(div_5, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_149, (768, 768), (768, 1))
+	    assert_size_stride(permute_157, (2304, 768), (768, 1))
+	    assert_size_stride(div_6, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_161, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_165, (3072, 768), (768, 1))
+	    assert_size_stride(div_7, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_169, (768, 768), (768, 1))
+	    assert_size_stride(permute_177, (2304, 768), (768, 1))
+	    assert_size_stride(div_8, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_181, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_185, (3072, 768), (768, 1))
+	    assert_size_stride(div_9, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_189, (768, 768), (768, 1))
+	    assert_size_stride(permute_197, (2304, 768), (768, 1))
+	    assert_size_stride(div_10, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_201, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_205, (3072, 768), (768, 1))
+	    assert_size_stride(div_11, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_209, (768, 768), (768, 1))
+	    assert_size_stride(permute_217, (2304, 768), (768, 1))
+	    assert_size_stride(div_12, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_221, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_225, (3072, 768), (768, 1))
+	    assert_size_stride(div_13, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_229, (768, 768), (768, 1))
+	    assert_size_stride(permute_237, (2304, 768), (768, 1))
+	    assert_size_stride(div_14, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_241, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_245, (3072, 768), (768, 1))
+	    assert_size_stride(div_15, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_249, (768, 768), (768, 1))
+	    assert_size_stride(permute_257, (2304, 768), (768, 1))
+	    assert_size_stride(div_16, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_261, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_265, (3072, 768), (768, 1))
+	    assert_size_stride(div_17, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_269, (768, 768), (768, 1))
+	    assert_size_stride(permute_277, (2304, 768), (768, 1))
+	    assert_size_stride(div_18, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_281, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_285, (3072, 768), (768, 1))
+	    assert_size_stride(div_19, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_289, (768, 768), (768, 1))
+	    assert_size_stride(permute_297, (2304, 768), (768, 1))
+	    assert_size_stride(div_20, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_301, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_305, (3072, 768), (768, 1))
+	    assert_size_stride(div_21, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_309, (768, 768), (768, 1))
+	    assert_size_stride(permute_317, (2304, 768), (768, 1))
+	    assert_size_stride(div_22, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_321, (768, 3072), (3072, 1))
+	    assert_size_stride(permute_325, (3072, 768), (768, 1))
+	    assert_size_stride(div_23, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(permute_329, (768, 768), (768, 1))
+	    assert_size_stride(permute_337, (2304, 768), (768, 1))
+	    assert_size_stride(div_24, (1, 64, 1), (64, 1, 1))
+	    assert_size_stride(tangents_1, (1, 1, 50304), (50304, 50304, 1))
+	    with torch.cuda._DeviceGuard(0):
+	        torch.cuda.set_device(0)
+	        buf0 = empty_strided_cuda((50304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(tangents_1, (50304, 1), (1, 50304), 0), view_144, out=buf0)
+	        del view_144
+	        buf1 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(tangents_1, (1, 50304), (50304, 1), 0), permute_99, out=buf1)
+	        del permute_99
+	        del tangents_1
+	        buf2 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.new_zeros]
+	        stream0 = get_raw_stream(0)
+	        triton_poi_fused_new_zeros_0.run(buf2, 49152, grid=grid(49152), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten.index_put, aten.new_zeros]
+	        triton_poi_fused_index_put_new_zeros_1.run(full_default, buf1, buf2, 768, grid=grid(768), stream=stream0)
+	        del full_default
+	        buf6 = empty_strided_cuda((1, 64, 768), (49152, 768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_2.run(buf2, primals_148, mul_96, div, buf6, 64, 768, grid=grid(64), stream=stream0)
+	        del div
+	        del primals_148
+	        buf7 = reinterpret_tensor(buf1, (768, ), (1, ), 0); del buf1  # reuse
+	        buf8 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf2, mul_96, buf7, buf8, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_96
+	        buf9 = empty_strided_cuda((64, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf6, (64, 768), (768, 1), 0), permute_101, out=buf9)
+	        del permute_101
+	        buf10 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf6, (768, 64), (1, 768), 0), view_142, out=buf10)
+	        del view_142
+	        buf11 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf6, buf11, 768, 64, grid=grid(768), stream=stream0)
+	        buf12 = reinterpret_tensor(buf9, (1, 64, 3072), (196608, 3072, 1), 0); del buf9  # reuse
+	        # Source Nodes: [add_46, add_47, mul_44, mul_45, mul_46, pow_12, tanh_11], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf12, addmm_46, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_46
+	        buf13 = reinterpret_tensor(buf2, (64, 768), (768, 1), 0); del buf2  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf12, (64, 3072), (3072, 1), 0), permute_105, out=buf13)
+	        del permute_105
+	        buf14 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf12, (3072, 64), (1, 3072), 0), view_140, out=buf14)
+	        del view_140
+	        buf15 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf12, buf15, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf20 = buf6; del buf6  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf20, buf13, primals_142, mul_90, div_1, 64, 768, grid=grid(64), stream=stream0)
+	        del div_1
+	        del primals_142
+	        buf18 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf19 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf13, mul_90, buf18, buf19, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_90
+	        buf21 = buf13; del buf13  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf20, (64, 768), (768, 1), 0), permute_109, out=buf21)
+	        del permute_109
+	        buf22 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf20, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_126, (64, 768), (768, 1), 0), out=buf22)
+	        buf23 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf20, buf23, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf24 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf21, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True)
+	        del buf21
+	        del getitem_126
+	        del getitem_127
+	        del getitem_128
+	        del getitem_129
+	        del permute_89
+	        del permute_90
+	        del permute_91
+	        buf25 = buf24[0]
+	        buf26 = buf24[1]
+	        buf27 = buf24[2]
+	        del buf24
+	        buf28 = empty_strided_cuda((1, 64, 2304), (147456, 2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf25, buf26, buf27, buf28, 147456, grid=grid(147456), stream=stream0)
+	        del buf25
+	        del buf26
+	        buf29 = reinterpret_tensor(buf27, (64, 768), (768, 1), 0); del buf27  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf28, (64, 2304), (2304, 1), 0), permute_117, out=buf29)
+	        del permute_117
+	        buf30 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf28, (2304, 64), (1, 2304), 0), view_132, out=buf30)
+	        del view_132
+	        buf31 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf28, buf31, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf36 = buf20; del buf20  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf36, buf29, primals_136, mul_88, div_2, 64, 768, grid=grid(64), stream=stream0)
+	        del div_2
+	        del primals_136
+	        buf34 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf35 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf29, mul_88, buf34, buf35, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_88
+	        buf37 = reinterpret_tensor(buf12, (64, 3072), (3072, 1), 0); del buf12  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf36, (64, 768), (768, 1), 0), permute_121, out=buf37)
+	        del permute_121
+	        buf38 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf36, (768, 64), (1, 768), 0), view_130, out=buf38)
+	        del view_130
+	        buf39 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf36, buf39, 768, 64, grid=grid(768), stream=stream0)
+	        buf40 = reinterpret_tensor(buf37, (1, 64, 3072), (196608, 3072, 1), 0); del buf37  # reuse
+	        # Source Nodes: [add_42, add_43, mul_40, mul_41, mul_42, pow_11, tanh_10], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf40, addmm_42, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_42
+	        buf41 = buf29; del buf29  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf40, (64, 3072), (3072, 1), 0), permute_125, out=buf41)
+	        del permute_125
+	        buf42 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf40, (3072, 64), (1, 3072), 0), view_128, out=buf42)
+	        del view_128
+	        buf43 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf40, buf43, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf48 = buf36; del buf36  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf48, buf41, primals_130, mul_82, div_3, 64, 768, grid=grid(64), stream=stream0)
+	        del div_3
+	        del primals_130
+	        buf46 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf47 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf41, mul_82, buf46, buf47, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_82
+	        buf49 = buf41; del buf41  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf48, (64, 768), (768, 1), 0), permute_129, out=buf49)
+	        del permute_129
+	        buf50 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf48, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_115, (64, 768), (768, 1), 0), out=buf50)
+	        buf51 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf48, buf51, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf52 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf49, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True)
+	        del buf49
+	        del getitem_115
+	        del getitem_116
+	        del getitem_117
+	        del getitem_118
+	        del permute_81
+	        del permute_82
+	        del permute_83
+	        buf53 = buf52[0]
+	        buf54 = buf52[1]
+	        buf55 = buf52[2]
+	        del buf52
+	        buf56 = buf28; del buf28  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf53, buf54, buf55, buf56, 147456, grid=grid(147456), stream=stream0)
+	        del buf53
+	        del buf54
+	        buf57 = reinterpret_tensor(buf55, (64, 768), (768, 1), 0); del buf55  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf56, (64, 2304), (2304, 1), 0), permute_137, out=buf57)
+	        del permute_137
+	        buf58 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf56, (2304, 64), (1, 2304), 0), view_120, out=buf58)
+	        del view_120
+	        buf59 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf56, buf59, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf64 = buf48; del buf48  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf64, buf57, primals_124, mul_80, div_4, 64, 768, grid=grid(64), stream=stream0)
+	        del div_4
+	        del primals_124
+	        buf62 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf63 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf57, mul_80, buf62, buf63, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_80
+	        buf65 = reinterpret_tensor(buf40, (64, 3072), (3072, 1), 0); del buf40  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf64, (64, 768), (768, 1), 0), permute_141, out=buf65)
+	        del permute_141
+	        buf66 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf64, (768, 64), (1, 768), 0), view_118, out=buf66)
+	        del view_118
+	        buf67 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf64, buf67, 768, 64, grid=grid(768), stream=stream0)
+	        buf68 = reinterpret_tensor(buf65, (1, 64, 3072), (196608, 3072, 1), 0); del buf65  # reuse
+	        # Source Nodes: [add_38, add_39, mul_36, mul_37, mul_38, pow_10, tanh_9], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf68, addmm_38, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_38
+	        buf69 = buf57; del buf57  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf68, (64, 3072), (3072, 1), 0), permute_145, out=buf69)
+	        del permute_145
+	        buf70 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf68, (3072, 64), (1, 3072), 0), view_116, out=buf70)
+	        del view_116
+	        buf71 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf68, buf71, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf76 = buf64; del buf64  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf76, buf69, primals_118, mul_74, div_5, 64, 768, grid=grid(64), stream=stream0)
+	        del div_5
+	        del primals_118
+	        buf74 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf75 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf69, mul_74, buf74, buf75, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_74
+	        buf77 = buf69; del buf69  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf76, (64, 768), (768, 1), 0), permute_149, out=buf77)
+	        del permute_149
+	        buf78 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf76, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_104, (64, 768), (768, 1), 0), out=buf78)
+	        buf79 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf76, buf79, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf80 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf77, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True)
+	        del buf77
+	        del getitem_104
+	        del getitem_105
+	        del getitem_106
+	        del getitem_107
+	        del permute_73
+	        del permute_74
+	        del permute_75
+	        buf81 = buf80[0]
+	        buf82 = buf80[1]
+	        buf83 = buf80[2]
+	        del buf80
+	        buf84 = buf56; del buf56  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf81, buf82, buf83, buf84, 147456, grid=grid(147456), stream=stream0)
+	        del buf81
+	        del buf82
+	        buf85 = reinterpret_tensor(buf83, (64, 768), (768, 1), 0); del buf83  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf84, (64, 2304), (2304, 1), 0), permute_157, out=buf85)
+	        del permute_157
+	        buf86 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf84, (2304, 64), (1, 2304), 0), view_108, out=buf86)
+	        del view_108
+	        buf87 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf84, buf87, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf92 = buf76; del buf76  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf92, buf85, primals_112, mul_72, div_6, 64, 768, grid=grid(64), stream=stream0)
+	        del div_6
+	        del primals_112
+	        buf90 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf91 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf85, mul_72, buf90, buf91, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_72
+	        buf93 = reinterpret_tensor(buf68, (64, 3072), (3072, 1), 0); del buf68  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf92, (64, 768), (768, 1), 0), permute_161, out=buf93)
+	        del permute_161
+	        buf94 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf92, (768, 64), (1, 768), 0), view_106, out=buf94)
+	        del view_106
+	        buf95 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf92, buf95, 768, 64, grid=grid(768), stream=stream0)
+	        buf96 = reinterpret_tensor(buf93, (1, 64, 3072), (196608, 3072, 1), 0); del buf93  # reuse
+	        # Source Nodes: [add_34, add_35, mul_32, mul_33, mul_34, pow_9, tanh_8], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf96, addmm_34, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_34
+	        buf97 = buf85; del buf85  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf96, (64, 3072), (3072, 1), 0), permute_165, out=buf97)
+	        del permute_165
+	        buf98 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf96, (3072, 64), (1, 3072), 0), view_104, out=buf98)
+	        del view_104
+	        buf99 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf96, buf99, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf104 = buf92; del buf92  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf104, buf97, primals_106, mul_66, div_7, 64, 768, grid=grid(64), stream=stream0)
+	        del div_7
+	        del primals_106
+	        buf102 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf103 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf97, mul_66, buf102, buf103, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_66
+	        buf105 = buf97; del buf97  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf104, (64, 768), (768, 1), 0), permute_169, out=buf105)
+	        del permute_169
+	        buf106 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf104, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_93, (64, 768), (768, 1), 0), out=buf106)
+	        buf107 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf104, buf107, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf108 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf105, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True)
+	        del buf105
+	        del getitem_93
+	        del getitem_94
+	        del getitem_95
+	        del getitem_96
+	        del permute_65
+	        del permute_66
+	        del permute_67
+	        buf109 = buf108[0]
+	        buf110 = buf108[1]
+	        buf111 = buf108[2]
+	        del buf108
+	        buf112 = buf84; del buf84  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf109, buf110, buf111, buf112, 147456, grid=grid(147456), stream=stream0)
+	        del buf109
+	        del buf110
+	        buf113 = reinterpret_tensor(buf111, (64, 768), (768, 1), 0); del buf111  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf112, (64, 2304), (2304, 1), 0), permute_177, out=buf113)
+	        del permute_177
+	        buf114 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf112, (2304, 64), (1, 2304), 0), view_96, out=buf114)
+	        del view_96
+	        buf115 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf112, buf115, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf120 = buf104; del buf104  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf120, buf113, primals_100, mul_64, div_8, 64, 768, grid=grid(64), stream=stream0)
+	        del div_8
+	        del primals_100
+	        buf118 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf119 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf113, mul_64, buf118, buf119, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_64
+	        buf121 = reinterpret_tensor(buf96, (64, 3072), (3072, 1), 0); del buf96  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf120, (64, 768), (768, 1), 0), permute_181, out=buf121)
+	        del permute_181
+	        buf122 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf120, (768, 64), (1, 768), 0), view_94, out=buf122)
+	        del view_94
+	        buf123 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf120, buf123, 768, 64, grid=grid(768), stream=stream0)
+	        buf124 = reinterpret_tensor(buf121, (1, 64, 3072), (196608, 3072, 1), 0); del buf121  # reuse
+	        # Source Nodes: [add_30, add_31, mul_28, mul_29, mul_30, pow_8, tanh_7], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf124, addmm_30, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_30
+	        buf125 = buf113; del buf113  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf124, (64, 3072), (3072, 1), 0), permute_185, out=buf125)
+	        del permute_185
+	        buf126 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf124, (3072, 64), (1, 3072), 0), view_92, out=buf126)
+	        del view_92
+	        buf127 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf124, buf127, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf132 = buf120; del buf120  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf132, buf125, primals_94, mul_58, div_9, 64, 768, grid=grid(64), stream=stream0)
+	        del div_9
+	        del primals_94
+	        buf130 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf131 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf125, mul_58, buf130, buf131, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_58
+	        buf133 = buf125; del buf125  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf132, (64, 768), (768, 1), 0), permute_189, out=buf133)
+	        del permute_189
+	        buf134 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf132, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_82, (64, 768), (768, 1), 0), out=buf134)
+	        buf135 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf132, buf135, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf136 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf133, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True)
+	        del buf133
+	        del getitem_82
+	        del getitem_83
+	        del getitem_84
+	        del getitem_85
+	        del permute_57
+	        del permute_58
+	        del permute_59
+	        buf137 = buf136[0]
+	        buf138 = buf136[1]
+	        buf139 = buf136[2]
+	        del buf136
+	        buf140 = buf112; del buf112  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf137, buf138, buf139, buf140, 147456, grid=grid(147456), stream=stream0)
+	        del buf137
+	        del buf138
+	        buf141 = reinterpret_tensor(buf139, (64, 768), (768, 1), 0); del buf139  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf140, (64, 2304), (2304, 1), 0), permute_197, out=buf141)
+	        del permute_197
+	        buf142 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf140, (2304, 64), (1, 2304), 0), view_84, out=buf142)
+	        del view_84
+	        buf143 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf140, buf143, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf148 = buf132; del buf132  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf148, buf141, primals_88, mul_56, div_10, 64, 768, grid=grid(64), stream=stream0)
+	        del div_10
+	        del primals_88
+	        buf146 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf147 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf141, mul_56, buf146, buf147, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_56
+	        buf149 = reinterpret_tensor(buf124, (64, 3072), (3072, 1), 0); del buf124  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf148, (64, 768), (768, 1), 0), permute_201, out=buf149)
+	        del permute_201
+	        buf150 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf148, (768, 64), (1, 768), 0), view_82, out=buf150)
+	        del view_82
+	        buf151 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf148, buf151, 768, 64, grid=grid(768), stream=stream0)
+	        buf152 = reinterpret_tensor(buf149, (1, 64, 3072), (196608, 3072, 1), 0); del buf149  # reuse
+	        # Source Nodes: [add_26, add_27, mul_24, mul_25, mul_26, pow_7, tanh_6], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf152, addmm_26, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_26
+	        buf153 = buf141; del buf141  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf152, (64, 3072), (3072, 1), 0), permute_205, out=buf153)
+	        del permute_205
+	        buf154 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf152, (3072, 64), (1, 3072), 0), view_80, out=buf154)
+	        del view_80
+	        buf155 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf152, buf155, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf160 = buf148; del buf148  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf160, buf153, primals_82, mul_50, div_11, 64, 768, grid=grid(64), stream=stream0)
+	        del div_11
+	        del primals_82
+	        buf158 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf159 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf153, mul_50, buf158, buf159, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_50
+	        buf161 = buf153; del buf153  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf160, (64, 768), (768, 1), 0), permute_209, out=buf161)
+	        del permute_209
+	        buf162 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf160, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_71, (64, 768), (768, 1), 0), out=buf162)
+	        buf163 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf160, buf163, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf164 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf161, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True)
+	        del buf161
+	        del getitem_71
+	        del getitem_72
+	        del getitem_73
+	        del getitem_74
+	        del permute_49
+	        del permute_50
+	        del permute_51
+	        buf165 = buf164[0]
+	        buf166 = buf164[1]
+	        buf167 = buf164[2]
+	        del buf164
+	        buf168 = buf140; del buf140  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf165, buf166, buf167, buf168, 147456, grid=grid(147456), stream=stream0)
+	        del buf165
+	        del buf166
+	        buf169 = reinterpret_tensor(buf167, (64, 768), (768, 1), 0); del buf167  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf168, (64, 2304), (2304, 1), 0), permute_217, out=buf169)
+	        del permute_217
+	        buf170 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf168, (2304, 64), (1, 2304), 0), view_72, out=buf170)
+	        del view_72
+	        buf171 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf168, buf171, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf176 = buf160; del buf160  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf176, buf169, primals_76, mul_48, div_12, 64, 768, grid=grid(64), stream=stream0)
+	        del div_12
+	        del primals_76
+	        buf174 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf175 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf169, mul_48, buf174, buf175, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_48
+	        buf177 = reinterpret_tensor(buf152, (64, 3072), (3072, 1), 0); del buf152  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf176, (64, 768), (768, 1), 0), permute_221, out=buf177)
+	        del permute_221
+	        buf178 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf176, (768, 64), (1, 768), 0), view_70, out=buf178)
+	        del view_70
+	        buf179 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf176, buf179, 768, 64, grid=grid(768), stream=stream0)
+	        buf180 = reinterpret_tensor(buf177, (1, 64, 3072), (196608, 3072, 1), 0); del buf177  # reuse
+	        # Source Nodes: [add_22, add_23, mul_20, mul_21, mul_22, pow_6, tanh_5], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf180, addmm_22, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_22
+	        buf181 = buf169; del buf169  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf180, (64, 3072), (3072, 1), 0), permute_225, out=buf181)
+	        del permute_225
+	        buf182 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf180, (3072, 64), (1, 3072), 0), view_68, out=buf182)
+	        del view_68
+	        buf183 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf180, buf183, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf188 = buf176; del buf176  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf188, buf181, primals_70, mul_42, div_13, 64, 768, grid=grid(64), stream=stream0)
+	        del div_13
+	        del primals_70
+	        buf186 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf187 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf181, mul_42, buf186, buf187, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_42
+	        buf189 = buf181; del buf181  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf188, (64, 768), (768, 1), 0), permute_229, out=buf189)
+	        del permute_229
+	        buf190 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf188, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_60, (64, 768), (768, 1), 0), out=buf190)
+	        buf191 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf188, buf191, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf192 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf189, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True)
+	        del buf189
+	        del getitem_60
+	        del getitem_61
+	        del getitem_62
+	        del getitem_63
+	        del permute_41
+	        del permute_42
+	        del permute_43
+	        buf193 = buf192[0]
+	        buf194 = buf192[1]
+	        buf195 = buf192[2]
+	        del buf192
+	        buf196 = buf168; del buf168  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf193, buf194, buf195, buf196, 147456, grid=grid(147456), stream=stream0)
+	        del buf193
+	        del buf194
+	        buf197 = reinterpret_tensor(buf195, (64, 768), (768, 1), 0); del buf195  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf196, (64, 2304), (2304, 1), 0), permute_237, out=buf197)
+	        del permute_237
+	        buf198 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf196, (2304, 64), (1, 2304), 0), view_60, out=buf198)
+	        del view_60
+	        buf199 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf196, buf199, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf204 = buf188; del buf188  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf204, buf197, primals_64, mul_40, div_14, 64, 768, grid=grid(64), stream=stream0)
+	        del div_14
+	        del primals_64
+	        buf202 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf203 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf197, mul_40, buf202, buf203, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_40
+	        buf205 = reinterpret_tensor(buf180, (64, 3072), (3072, 1), 0); del buf180  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf204, (64, 768), (768, 1), 0), permute_241, out=buf205)
+	        del permute_241
+	        buf206 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf204, (768, 64), (1, 768), 0), view_58, out=buf206)
+	        del view_58
+	        buf207 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf204, buf207, 768, 64, grid=grid(768), stream=stream0)
+	        buf208 = reinterpret_tensor(buf205, (1, 64, 3072), (196608, 3072, 1), 0); del buf205  # reuse
+	        # Source Nodes: [add_18, add_19, mul_16, mul_17, mul_18, pow_5, tanh_4], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf208, addmm_18, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_18
+	        buf209 = buf197; del buf197  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf208, (64, 3072), (3072, 1), 0), permute_245, out=buf209)
+	        del permute_245
+	        buf210 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf208, (3072, 64), (1, 3072), 0), view_56, out=buf210)
+	        del view_56
+	        buf211 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf208, buf211, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf216 = buf204; del buf204  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf216, buf209, primals_58, mul_34, div_15, 64, 768, grid=grid(64), stream=stream0)
+	        del div_15
+	        del primals_58
+	        buf214 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf215 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf209, mul_34, buf214, buf215, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_34
+	        buf217 = buf209; del buf209  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf216, (64, 768), (768, 1), 0), permute_249, out=buf217)
+	        del permute_249
+	        buf218 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf216, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_49, (64, 768), (768, 1), 0), out=buf218)
+	        buf219 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf216, buf219, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf220 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf217, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True)
+	        del buf217
+	        del getitem_49
+	        del getitem_50
+	        del getitem_51
+	        del getitem_52
+	        del permute_33
+	        del permute_34
+	        del permute_35
+	        buf221 = buf220[0]
+	        buf222 = buf220[1]
+	        buf223 = buf220[2]
+	        del buf220
+	        buf224 = buf196; del buf196  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf221, buf222, buf223, buf224, 147456, grid=grid(147456), stream=stream0)
+	        del buf221
+	        del buf222
+	        buf225 = reinterpret_tensor(buf223, (64, 768), (768, 1), 0); del buf223  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf224, (64, 2304), (2304, 1), 0), permute_257, out=buf225)
+	        del permute_257
+	        buf226 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf224, (2304, 64), (1, 2304), 0), view_48, out=buf226)
+	        del view_48
+	        buf227 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf224, buf227, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf232 = buf216; del buf216  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf232, buf225, primals_52, mul_32, div_16, 64, 768, grid=grid(64), stream=stream0)
+	        del div_16
+	        del primals_52
+	        buf230 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf231 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf225, mul_32, buf230, buf231, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_32
+	        buf233 = reinterpret_tensor(buf208, (64, 3072), (3072, 1), 0); del buf208  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf232, (64, 768), (768, 1), 0), permute_261, out=buf233)
+	        del permute_261
+	        buf234 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf232, (768, 64), (1, 768), 0), view_46, out=buf234)
+	        del view_46
+	        buf235 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf232, buf235, 768, 64, grid=grid(768), stream=stream0)
+	        buf236 = reinterpret_tensor(buf233, (1, 64, 3072), (196608, 3072, 1), 0); del buf233  # reuse
+	        # Source Nodes: [add_14, add_15, mul_12, mul_13, mul_14, pow_4, tanh_3], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf236, addmm_14, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_14
+	        buf237 = buf225; del buf225  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf236, (64, 3072), (3072, 1), 0), permute_265, out=buf237)
+	        del permute_265
+	        buf238 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf236, (3072, 64), (1, 3072), 0), view_44, out=buf238)
+	        del view_44
+	        buf239 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf236, buf239, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf244 = buf232; del buf232  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf244, buf237, primals_46, mul_26, div_17, 64, 768, grid=grid(64), stream=stream0)
+	        del div_17
+	        del primals_46
+	        buf242 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf243 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf237, mul_26, buf242, buf243, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_26
+	        buf245 = buf237; del buf237  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf244, (64, 768), (768, 1), 0), permute_269, out=buf245)
+	        del permute_269
+	        buf246 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf244, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_38, (64, 768), (768, 1), 0), out=buf246)
+	        buf247 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf244, buf247, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf248 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf245, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True)
+	        del buf245
+	        del getitem_38
+	        del getitem_39
+	        del getitem_40
+	        del getitem_41
+	        del permute_25
+	        del permute_26
+	        del permute_27
+	        buf249 = buf248[0]
+	        buf250 = buf248[1]
+	        buf251 = buf248[2]
+	        del buf248
+	        buf252 = buf224; del buf224  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf249, buf250, buf251, buf252, 147456, grid=grid(147456), stream=stream0)
+	        del buf249
+	        del buf250
+	        buf253 = reinterpret_tensor(buf251, (64, 768), (768, 1), 0); del buf251  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf252, (64, 2304), (2304, 1), 0), permute_277, out=buf253)
+	        del permute_277
+	        buf254 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf252, (2304, 64), (1, 2304), 0), view_36, out=buf254)
+	        del view_36
+	        buf255 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf252, buf255, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf260 = buf244; del buf244  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf260, buf253, primals_40, mul_24, div_18, 64, 768, grid=grid(64), stream=stream0)
+	        del div_18
+	        del primals_40
+	        buf258 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf259 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf253, mul_24, buf258, buf259, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_24
+	        buf261 = reinterpret_tensor(buf236, (64, 3072), (3072, 1), 0); del buf236  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf260, (64, 768), (768, 1), 0), permute_281, out=buf261)
+	        del permute_281
+	        buf262 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf260, (768, 64), (1, 768), 0), view_34, out=buf262)
+	        del view_34
+	        buf263 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf260, buf263, 768, 64, grid=grid(768), stream=stream0)
+	        buf264 = reinterpret_tensor(buf261, (1, 64, 3072), (196608, 3072, 1), 0); del buf261  # reuse
+	        # Source Nodes: [add_10, add_11, mul_10, mul_8, mul_9, pow_3, tanh_2], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf264, addmm_10, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_10
+	        buf265 = buf253; del buf253  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf264, (64, 3072), (3072, 1), 0), permute_285, out=buf265)
+	        del permute_285
+	        buf266 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf264, (3072, 64), (1, 3072), 0), view_32, out=buf266)
+	        del view_32
+	        buf267 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf264, buf267, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf272 = buf260; del buf260  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf272, buf265, primals_34, mul_18, div_19, 64, 768, grid=grid(64), stream=stream0)
+	        del div_19
+	        del primals_34
+	        buf270 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf271 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf265, mul_18, buf270, buf271, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_18
+	        buf273 = buf265; del buf265  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf272, (64, 768), (768, 1), 0), permute_289, out=buf273)
+	        del permute_289
+	        buf274 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf272, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_27, (64, 768), (768, 1), 0), out=buf274)
+	        buf275 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf272, buf275, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf276 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf273, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True)
+	        del buf273
+	        del getitem_27
+	        del getitem_28
+	        del getitem_29
+	        del getitem_30
+	        del permute_17
+	        del permute_18
+	        del permute_19
+	        buf277 = buf276[0]
+	        buf278 = buf276[1]
+	        buf279 = buf276[2]
+	        del buf276
+	        buf280 = buf252; del buf252  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf277, buf278, buf279, buf280, 147456, grid=grid(147456), stream=stream0)
+	        del buf277
+	        del buf278
+	        buf281 = reinterpret_tensor(buf279, (64, 768), (768, 1), 0); del buf279  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf280, (64, 2304), (2304, 1), 0), permute_297, out=buf281)
+	        del permute_297
+	        buf282 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf280, (2304, 64), (1, 2304), 0), view_24, out=buf282)
+	        del view_24
+	        buf283 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf280, buf283, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf288 = buf272; del buf272  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf288, buf281, primals_28, mul_16, div_20, 64, 768, grid=grid(64), stream=stream0)
+	        del div_20
+	        del primals_28
+	        buf286 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf287 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf281, mul_16, buf286, buf287, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_16
+	        buf289 = reinterpret_tensor(buf264, (64, 3072), (3072, 1), 0); del buf264  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf288, (64, 768), (768, 1), 0), permute_301, out=buf289)
+	        del permute_301
+	        buf290 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf288, (768, 64), (1, 768), 0), view_22, out=buf290)
+	        del view_22
+	        buf291 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf288, buf291, 768, 64, grid=grid(768), stream=stream0)
+	        buf292 = reinterpret_tensor(buf289, (1, 64, 3072), (196608, 3072, 1), 0); del buf289  # reuse
+	        # Source Nodes: [add_6, add_7, mul_4, mul_5, mul_6, pow_2, tanh_1], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf292, addmm_6, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_6
+	        buf293 = buf281; del buf281  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf292, (64, 3072), (3072, 1), 0), permute_305, out=buf293)
+	        del permute_305
+	        buf294 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf292, (3072, 64), (1, 3072), 0), view_20, out=buf294)
+	        del view_20
+	        buf295 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf292, buf295, 3072, 64, grid=grid(3072), stream=stream0)
+	        buf300 = buf288; del buf288  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf300, buf293, primals_22, mul_10, div_21, 64, 768, grid=grid(64), stream=stream0)
+	        del div_21
+	        del primals_22
+	        buf298 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf299 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf293, mul_10, buf298, buf299, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_10
+	        buf301 = buf293; del buf293  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf300, (64, 768), (768, 1), 0), permute_309, out=buf301)
+	        del permute_309
+	        buf302 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf300, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_16, (64, 768), (768, 1), 0), out=buf302)
+	        buf303 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf300, buf303, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf304 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf301, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True)
+	        del buf301
+	        del getitem_16
+	        del getitem_17
+	        del getitem_18
+	        del getitem_19
+	        del permute_10
+	        del permute_11
+	        del permute_9
+	        buf305 = buf304[0]
+	        buf306 = buf304[1]
+	        buf307 = buf304[2]
+	        del buf304
+	        buf308 = buf280; del buf280  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf305, buf306, buf307, buf308, 147456, grid=grid(147456), stream=stream0)
+	        del buf305
+	        del buf306
+	        buf309 = reinterpret_tensor(buf307, (64, 768), (768, 1), 0); del buf307  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf308, (64, 2304), (2304, 1), 0), permute_317, out=buf309)
+	        del permute_317
+	        buf310 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf308, (2304, 64), (1, 2304), 0), view_12, out=buf310)
+	        del view_12
+	        buf311 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf308, buf311, 2304, 64, grid=grid(2304), stream=stream0)
+	        buf316 = buf300; del buf300  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf316, buf309, primals_16, mul_8, div_22, 64, 768, grid=grid(64), stream=stream0)
+	        del div_22
+	        del primals_16
+	        buf314 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf315 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf309, mul_8, buf314, buf315, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_8
+	        buf317 = reinterpret_tensor(buf292, (64, 3072), (3072, 1), 0); del buf292  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf316, (64, 768), (768, 1), 0), permute_321, out=buf317)
+	        del permute_321
+	        buf318 = empty_strided_cuda((768, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf316, (768, 64), (1, 768), 0), view_10, out=buf318)
+	        del view_10
+	        buf319 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf316, buf319, 768, 64, grid=grid(768), stream=stream0)
+	        buf320 = reinterpret_tensor(buf317, (1, 64, 3072), (196608, 3072, 1), 0); del buf317  # reuse
+	        # Source Nodes: [add_2, add_3, mul, mul_1, mul_2, pow_1, tanh], Original ATen: [aten.add, aten.mul, aten.pow, aten.tanh, aten.tanh_backward]
+	        triton_poi_fused_add_mul_pow_tanh_tanh_backward_5.run(buf320, addmm_2, 196608, grid=grid(196608), stream=stream0)
+	        del addmm_2
+	        buf321 = buf309; del buf309  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf320, (64, 3072), (3072, 1), 0), permute_325, out=buf321)
+	        del permute_325
+	        buf322 = empty_strided_cuda((3072, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf320, (3072, 64), (1, 3072), 0), view_8, out=buf322)
+	        del view_8
+	        buf323 = empty_strided_cuda((1, 3072), (3072, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_6.run(buf320, buf323, 3072, 64, grid=grid(3072), stream=stream0)
+	        del buf320
+	        buf328 = buf316; del buf316  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.native_layer_norm_backward]
+	        triton_per_fused_add_native_layer_norm_backward_7.run(buf328, buf321, primals_10, mul_2, div_23, 64, 768, grid=grid(64), stream=stream0)
+	        del div_23
+	        del primals_10
+	        buf326 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf327 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf321, mul_2, buf326, buf327, 768, 64, grid=grid(768), stream=stream0)
+	        del mul_2
+	        buf329 = buf321; del buf321  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf328, (64, 768), (768, 1), 0), permute_329, out=buf329)
+	        del permute_329
+	        buf330 = empty_strided_cuda((768, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf328, (768, 64), (1, 768), 0), reinterpret_tensor(getitem_5, (64, 768), (768, 1), 0), out=buf330)
+	        buf331 = empty_strided_cuda((1, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_4.run(buf328, buf331, 768, 64, grid=grid(768), stream=stream0)
+	        # Source Nodes: [], Original ATen: [aten._scaled_dot_product_efficient_attention_backward]
+	        buf332 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(reinterpret_tensor(buf329, (1, 12, 64, 64), (49152, 64, 768, 1), 0), permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True)
+	        del buf329
+	        del getitem_5
+	        del getitem_6
+	        del getitem_7
+	        del getitem_8
+	        del permute_1
+	        del permute_2
+	        del permute_3
+	        buf333 = buf332[0]
+	        buf334 = buf332[1]
+	        buf335 = buf332[2]
+	        del buf332
+	        buf336 = buf308; del buf308  # reuse
+	        # Source Nodes: [], Original ATen: [aten.cat]
+	        triton_poi_fused_cat_8.run(buf333, buf334, buf335, buf336, 147456, grid=grid(147456), stream=stream0)
+	        del buf333
+	        del buf334
+	        buf337 = reinterpret_tensor(buf335, (64, 768), (768, 1), 0); del buf335  # reuse
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf336, (64, 2304), (2304, 1), 0), permute_337, out=buf337)
+	        del permute_337
+	        buf338 = empty_strided_cuda((2304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.mm]
+	        extern_kernels.mm(reinterpret_tensor(buf336, (2304, 64), (1, 2304), 0), view, out=buf338)
+	        del view
+	        buf339 = empty_strided_cuda((1, 2304), (2304, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.sum]
+	        triton_per_fused_sum_9.run(buf336, buf339, 2304, 64, grid=grid(2304), stream=stream0)
+	        del buf336
+	        buf345 = empty_strided_cuda((1024, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.embedding_dense_backward]
+	        triton_poi_fused_embedding_dense_backward_10.run(buf345, 786432, grid=grid(786432), stream=stream0)
+	        buf347 = empty_strided_cuda((50304, 768), (768, 1), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.embedding_dense_backward]
+	        triton_poi_fused_embedding_dense_backward_11.run(buf347, 38633472, grid=grid(38633472), stream=stream0)
+	        buf344 = buf328; del buf328  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add, aten.embedding_dense_backward, aten.native_layer_norm_backward]
+	        triton_per_fused_add_embedding_dense_backward_native_layer_norm_backward_12.run(buf344, buf337, primals_4, mul, div_24, unsqueeze, primals_1, buf345, buf347, 64, 768, grid=grid(64), stream=stream0)
+	        del buf344
+	        del div_24
+	        del primals_1
+	        del primals_4
+	        del unsqueeze
+	        buf342 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        buf343 = empty_strided_cuda((768, ), (1, ), torch.float32)
+	        # Source Nodes: [], Original ATen: [aten.native_layer_norm_backward]
+	        triton_per_fused_native_layer_norm_backward_3.run(buf337, mul, buf342, buf343, 768, 64, grid=grid(768), stream=stream0)
+	        del buf337
+	        del mul
+	        buf349 = buf0; del buf0  # reuse
+	        # Source Nodes: [], Original ATen: [aten.add]
+	        triton_poi_fused_add_13.run(buf349, buf347, 38633472, grid=grid(38633472), stream=stream0)
+	        del buf347
+	    return (None, buf349, buf345, buf342, buf343, buf338, reinterpret_tensor(buf339, (2304, ), (1, ), 0), buf330, reinterpret_tensor(buf331, (768, ), (1, ), 0), buf326, buf327, buf322, reinterpret_tensor(buf323, (3072, ), (1, ), 0), buf318, reinterpret_tensor(buf319, (768, ), (1, ), 0), buf314, buf315, buf310, reinterpret_tensor(buf311, (2304, ), (1, ), 0), buf302, reinterpret_tensor(buf303, (768, ), (1, ), 0), buf298, buf299, buf294, reinterpret_tensor(buf295, (3072, ), (1, ), 0), buf290, reinterpret_tensor(buf291, (768, ), (1, ), 0), buf286, buf287, buf282, reinterpret_tensor(buf283, (2304, ), (1, ), 0), buf274, reinterpret_tensor(buf275, (768, ), (1, ), 0), buf270, buf271, buf266, reinterpret_tensor(buf267, (3072, ), (1, ), 0), buf262, reinterpret_tensor(buf263, (768, ), (1, ), 0), buf258, buf259, buf254, reinterpret_tensor(buf255, (2304, ), (1, ), 0), buf246, reinterpret_tensor(buf247, (768, ), (1, ), 0), buf242, buf243, buf238, reinterpret_tensor(buf239, (3072, ), (1, ), 0), buf234, reinterpret_tensor(buf235, (768, ), (1, ), 0), buf230, buf231, buf226, reinterpret_tensor(buf227, (2304, ), (1, ), 0), buf218, reinterpret_tensor(buf219, (768, ), (1, ), 0), buf214, buf215, buf210, reinterpret_tensor(buf211, (3072, ), (1, ), 0), buf206, reinterpret_tensor(buf207, (768, ), (1, ), 0), buf202, buf203, buf198, reinterpret_tensor(buf199, (2304, ), (1, ), 0), buf190, reinterpret_tensor(buf191, (768, ), (1, ), 0), buf186, buf187, buf182, reinterpret_tensor(buf183, (3072, ), (1, ), 0), buf178, reinterpret_tensor(buf179, (768, ), (1, ), 0), buf174, buf175, buf170, reinterpret_tensor(buf171, (2304, ), (1, ), 0), buf162, reinterpret_tensor(buf163, (768, ), (1, ), 0), buf158, buf159, buf154, reinterpret_tensor(buf155, (3072, ), (1, ), 0), buf150, reinterpret_tensor(buf151, (768, ), (1, ), 0), buf146, buf147, buf142, reinterpret_tensor(buf143, (2304, ), (1, ), 0), buf134, reinterpret_tensor(buf135, (768, ), (1, ), 0), buf130, buf131, buf126, reinterpret_tensor(buf127, (3072, ), (1, ), 0), buf122, reinterpret_tensor(buf123, (768, ), (1, ), 0), buf118, buf119, buf114, reinterpret_tensor(buf115, (2304, ), (1, ), 0), buf106, reinterpret_tensor(buf107, (768, ), (1, ), 0), buf102, buf103, buf98, reinterpret_tensor(buf99, (3072, ), (1, ), 0), buf94, reinterpret_tensor(buf95, (768, ), (1, ), 0), buf90, buf91, buf86, reinterpret_tensor(buf87, (2304, ), (1, ), 0), buf78, reinterpret_tensor(buf79, (768, ), (1, ), 0), buf74, buf75, buf70, reinterpret_tensor(buf71, (3072, ), (1, ), 0), buf66, reinterpret_tensor(buf67, (768, ), (1, ), 0), buf62, buf63, buf58, reinterpret_tensor(buf59, (2304, ), (1, ), 0), buf50, reinterpret_tensor(buf51, (768, ), (1, ), 0), buf46, buf47, buf42, reinterpret_tensor(buf43, (3072, ), (1, ), 0), buf38, reinterpret_tensor(buf39, (768, ), (1, ), 0), buf34, buf35, buf30, reinterpret_tensor(buf31, (2304, ), (1, ), 0), buf22, reinterpret_tensor(buf23, (768, ), (1, ), 0), buf18, buf19, buf14, reinterpret_tensor(buf15, (3072, ), (1, ), 0), buf10, reinterpret_tensor(buf11, (768, ), (1, ), 0), buf7, buf8, )
+	
+	
+	def benchmark_compiled_module(times=10, repeat=10):
+	    from torch._dynamo.testing import rand_strided
+	    from torch._inductor.utils import print_performance
+	    primals_1 = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64)
+	    primals_4 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_10 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_16 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_22 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_28 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_34 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_40 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_46 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_52 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_58 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_64 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_70 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_76 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_82 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_88 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_94 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_100 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_106 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_112 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_118 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_124 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_130 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_136 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_142 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_148 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    unsqueeze = rand_strided((1, 64), (64, 1), device='cuda:0', dtype=torch.int64)
+	    mul = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_1 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_2 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_3 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_5 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_6 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_7 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_8 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_2 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_8 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_2 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_10 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_8 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_12 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_9 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_10 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_11 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_16 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_17 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_18 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_19 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_10 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_20 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_6 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_22 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_16 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_24 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_17 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_18 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_19 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_27 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_28 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_29 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_30 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_18 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_32 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_10 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_34 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_24 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_36 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_25 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_26 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_27 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_38 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_39 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_40 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_41 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_26 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_44 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_14 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_46 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_32 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_48 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_33 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_34 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_35 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_49 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_50 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_51 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_52 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_34 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_56 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_18 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_58 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_40 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_60 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_41 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_42 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_43 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_60 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_61 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_62 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_63 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_42 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_68 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_22 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_70 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_48 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_72 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_49 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_50 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_51 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_71 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_72 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_73 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_74 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_50 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_80 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_26 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_82 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_56 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_84 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_57 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_58 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_59 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_82 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_83 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_84 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_85 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_58 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_92 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_30 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_94 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_64 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_96 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_65 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_66 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_67 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_93 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_94 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_95 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_96 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_66 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_104 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_34 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_106 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_72 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_108 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_73 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_74 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_75 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_104 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_105 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_106 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_107 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_74 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_116 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_38 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_118 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_80 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_120 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_81 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_82 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_83 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_115 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_116 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_117 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_118 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_82 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_128 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_42 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_130 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_88 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_132 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_89 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_90 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    permute_91 = rand_strided((1, 12, 64, 64), (147456, 64, 2304, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_126 = rand_strided((1, 12, 64, 64), (49152, 64, 768, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_127 = rand_strided((1, 12, 64), (768, 64, 1), device='cuda:0', dtype=torch.float32)
+	    getitem_128 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    getitem_129 = rand_strided((), (), device='cuda:0', dtype=torch.int64)
+	    mul_90 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    view_140 = rand_strided((64, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    addmm_46 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    view_142 = rand_strided((64, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    mul_96 = rand_strided((1, 64, 768), (49152, 768, 1), device='cuda:0', dtype=torch.float32)
+	    full_default = rand_strided((1, ), (1, ), device='cuda:0', dtype=torch.int64)
+	    view_144 = rand_strided((1, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_99 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_101 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_105 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_1 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_109 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_117 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_2 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_121 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_125 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_3 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_129 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_137 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_4 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_141 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_145 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_5 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_149 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_157 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_6 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_161 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_165 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_7 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_169 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_177 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_8 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_181 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_185 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_9 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_189 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_197 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_10 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_201 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_205 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_11 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_209 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_217 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_12 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_221 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_225 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_13 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_229 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_237 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_14 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_241 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_245 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_15 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_249 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_257 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_16 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_261 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_265 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_17 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_269 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_277 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_18 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_281 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_285 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_19 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_289 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_297 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_20 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_301 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_305 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_21 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_309 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_317 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_22 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_321 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    permute_325 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_23 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    permute_329 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    permute_337 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    div_24 = rand_strided((1, 64, 1), (64, 1, 1), device='cuda:0', dtype=torch.float32)
+	    tangents_1 = rand_strided((1, 1, 50304), (50304, 50304, 1), device='cuda:0', dtype=torch.float32)
+	    fn = lambda: call([primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1])
+	    return print_performance(fn, times=times, repeat=repeat)
+	
+	
+	if __name__ == "__main__":
+	    from torch._inductor.wrapper_benchmark import compiled_module_main
+	    compiled_module_main('nanogpt', benchmark_compiled_module)
+	
+V0806 13:56:05.012000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cb330bc8adf17d5898bfa70fc4423e7d"}
+	{
+	"name": "code_gen",
+	"ts": 1722977765012266.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.012000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9a7b7431fbf8818663bc6d9b02ce0e2f"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977765012451.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.128000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "13ac313518e4246cb8dc2903f2b97b4b"}
+	{
+	"name": "fx_graph_cache_miss",
+	"ts": 1722977761862518.2,
+	"args": {
+	"key": "foijwxq2i7flux6r2ba5gws3rpialjqk5cmhfg54f7i2spz557vl",
+	"cache_state": "miss",
+	"components": [
+	"[4hgegienmiaqunsqbxyycnrivovz4r63bypl5psmqilwotq5er6] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1):\n    view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]);  tangents_1 = None\n    permute_97 = torch.ops.aten.permute.default(view_146, [1, 0])\n    mm_1 = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None\n    permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None\n    mm_2 = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None\n    view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]);  mm_2 = None\n    permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None\n    full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True);  full_default_1 = full_default = view_147 = None\n    mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148);  primals_148 = None\n    mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768)\n    sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)\n    mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96);  mul_99 = None\n    sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None\n    mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2);  sum_2 = None\n    sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None\n    sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None\n    mul_103 = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None\n    mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96);  mul_96 = None\n    sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None\n    sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]);  index_put = None\n    view_148 = torch.ops.aten.view.default(mul_103, [64, 768])\n    mm_3 = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None\n    permute_102 = torch.ops.aten.permute.default(view_148, [1, 0])\n    mm_4 = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None\n    permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None\n    sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None\n    view_149 = torch.ops.aten.view.default(sum_5, [768]);  sum_5 = None\n    permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None\n    view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]);  mm_3 = None\n    view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]);  addmm_46 = None\n    mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n    mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None\n    pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n    mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None\n    add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None\n    mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None\n    tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None\n    add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0)\n    mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None\n    mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11);  tanh_11 = None\n    sub_28 = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None\n    mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None\n    mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None\n    mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715)\n    pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None\n    mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None\n    mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None\n    add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None\n    mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None\n    add_100 = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None\n    view_151 = torch.ops.aten.view.default(add_100, [64, 3072]);  add_100 = None\n    mm_5 = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None\n    permute_106 = torch.ops.aten.permute.default(view_151, [1, 0])\n    mm_6 = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None\n    permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None\n    sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None\n    view_152 = torch.ops.aten.view.default(sum_6, [3072]);  sum_6 = None\n    permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None\n    view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]);  mm_5 = None\n    mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None\n    mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768)\n    sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)\n    mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90);  mul_115 = None\n    sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None\n    mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8);  sum_8 = None\n    sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None\n    sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None\n    mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None\n    mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90);  mul_90 = None\n    sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None\n    sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None\n    add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None\n    view_154 = torch.ops.aten.view.default(add_101, [64, 768])\n    mm_7 = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None\n    permute_110 = torch.ops.aten.permute.default(view_154, [1, 0])\n    permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n    view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None\n    view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None\n    mm_8 = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None\n    permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None\n    sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None\n    view_155 = torch.ops.aten.view.default(sum_11, [768]);  sum_11 = None\n    permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None\n    view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]);  mm_7 = None\n    view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]);  view_156 = None\n    permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None\n    _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None\n    getitem_134 = _scaled_dot_product_efficient_attention_backward[0]\n    getitem_135 = _scaled_dot_product_efficient_attention_backward[1]\n    getitem_136 = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None\n    permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None\n    view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]);  permute_114 = None\n    permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None\n    view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]);  permute_115 = None\n    permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None\n    view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]);  permute_116 = None\n    cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None\n    view_161 = torch.ops.aten.view.default(cat, [64, 2304]);  cat = None\n    mm_9 = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None\n    permute_118 = torch.ops.aten.permute.default(view_161, [1, 0])\n    mm_10 = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None\n    permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None\n    sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None\n    view_162 = torch.ops.aten.view.default(sum_12, [2304]);  sum_12 = None\n    permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None\n    view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]);  mm_9 = None\n    mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None\n    mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768)\n    sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)\n    mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88);  mul_122 = None\n    sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None\n    mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14);  sum_14 = None\n    sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None\n    sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None\n    mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None\n    mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88);  mul_88 = None\n    sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None\n    sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None\n    add_102 = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None\n    view_164 = torch.ops.aten.view.default(add_102, [64, 768])\n    mm_11 = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None\n    permute_122 = torch.ops.aten.permute.default(view_164, [1, 0])\n    mm_12 = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None\n    permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None\n    sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None\n    view_165 = torch.ops.aten.view.default(sum_17, [768]);  sum_17 = None\n    permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None\n    view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]);  mm_11 = None\n    view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]);  addmm_42 = None\n    mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n    mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None\n    pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n    mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None\n    add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None\n    mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None\n    tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None\n    add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0)\n    mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None\n    mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10);  tanh_10 = None\n    sub_35 = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None\n    mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None\n    mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None\n    mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715)\n    pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None\n    mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None\n    mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None\n    add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None\n    mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None\n    add_104 = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None\n    view_167 = torch.ops.aten.view.default(add_104, [64, 3072]);  add_104 = None\n    mm_13 = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None\n    permute_126 = torch.ops.aten.permute.default(view_167, [1, 0])\n    mm_14 = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None\n    permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None\n    sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None\n    view_168 = torch.ops.aten.view.default(sum_18, [3072]);  sum_18 = None\n    permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None\n    view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]);  mm_13 = None\n    mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None\n    mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768)\n    sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)\n    mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82);  mul_138 = None\n    sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None\n    mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20);  sum_20 = None\n    sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None\n    sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None\n    mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None\n    mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82);  mul_82 = None\n    sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None\n    sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None\n    add_105 = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None\n    view_170 = torch.ops.aten.view.default(add_105, [64, 768])\n    mm_15 = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None\n    permute_130 = torch.ops.aten.permute.default(view_170, [1, 0])\n    permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n    view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None\n    view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None\n    mm_16 = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None\n    permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None\n    sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None\n    view_171 = torch.ops.aten.view.default(sum_23, [768]);  sum_23 = None\n    permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None\n    view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]);  mm_15 = None\n    view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]);  view_172 = None\n    permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None\n    _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None\n    getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0]\n    getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1]\n    getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None\n    permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None\n    view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]);  permute_134 = None\n    permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None\n    view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]);  permute_135 = None\n    permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None\n    view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]);  permute_136 = None\n    cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None\n    view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]);  cat_1 = None\n    mm_17 = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None\n    permute_138 = torch.ops.aten.permute.default(view_177, [1, 0])\n    mm_18 = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None\n    permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None\n    sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None\n    view_178 = torch.ops.aten.view.default(sum_24, [2304]);  sum_24 = None\n    permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None\n    view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]);  mm_17 = None\n    mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None\n    mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768)\n    sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)\n    mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80);  mul_145 = None\n    sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None\n    mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26);  sum_26 = None\n    sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None\n    sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None\n    mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None\n    mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80);  mul_80 = None\n    sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None\n    sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None\n    add_106 = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None\n    view_180 = torch.ops.aten.view.default(add_106, [64, 768])\n    mm_19 = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None\n    permute_142 = torch.ops.aten.permute.default(view_180, [1, 0])\n    mm_20 = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None\n    permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None\n    sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None\n    view_181 = torch.ops.aten.view.default(sum_29, [768]);  sum_29 = None\n    permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None\n    view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]);  mm_19 = None\n    view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]);  addmm_38 = None\n    mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n    mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None\n    pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n    mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None\n    add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None\n    mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None\n    tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None\n    add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0)\n    mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None\n    mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9);  tanh_9 = None\n    sub_42 = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None\n    mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None\n    mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None\n    mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715)\n    pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None\n    mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None\n    mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None\n    add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None\n    mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None\n    add_108 = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None\n    view_183 = torch.ops.aten.view.default(add_108, [64, 3072]);  add_108 = None\n    mm_21 = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None\n    permute_146 = torch.ops.aten.permute.default(view_183, [1, 0])\n    mm_22 = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None\n    permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None\n    sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None\n    view_184 = torch.ops.aten.view.default(sum_30, [3072]);  sum_30 = None\n    permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None\n    view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]);  mm_21 = None\n    mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None\n    mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768)\n    sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)\n    mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74);  mul_161 = None\n    sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None\n    mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32);  sum_32 = None\n    sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None\n    sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None\n    mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None\n    mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74);  mul_74 = None\n    sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None\n    sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None\n    add_109 = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None\n    view_186 = torch.ops.aten.view.default(add_109, [64, 768])\n    mm_23 = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None\n    permute_150 = torch.ops.aten.permute.default(view_186, [1, 0])\n    permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n    view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None\n    view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None\n    mm_24 = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None\n    permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None\n    sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None\n    view_187 = torch.ops.aten.view.default(sum_35, [768]);  sum_35 = None\n    permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None\n    view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]);  mm_23 = None\n    view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]);  view_188 = None\n    permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None\n    _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None\n    getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0]\n    getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1]\n    getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None\n    permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None\n    view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]);  permute_154 = None\n    permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None\n    view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]);  permute_155 = None\n    permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None\n    view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]);  permute_156 = None\n    cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None\n    view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]);  cat_2 = None\n    mm_25 = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None\n    permute_158 = torch.ops.aten.permute.default(view_193, [1, 0])\n    mm_26 = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None\n    permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None\n    sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None\n    view_194 = torch.ops.aten.view.default(sum_36, [2304]);  sum_36 = None\n    permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None\n    view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]);  mm_25 = None\n    mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None\n    mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768)\n    sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)\n    mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72);  mul_168 = None\n    sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None\n    mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38);  sum_38 = None\n    sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None\n    sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None\n    mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None\n    mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72);  mul_72 = None\n    sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None\n    sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None\n    add_110 = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None\n    view_196 = torch.ops.aten.view.default(add_110, [64, 768])\n    mm_27 = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None\n    permute_162 = torch.ops.aten.permute.default(view_196, [1, 0])\n    mm_28 = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None\n    permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None\n    sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None\n    view_197 = torch.ops.aten.view.default(sum_41, [768]);  sum_41 = None\n    permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None\n    view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]);  mm_27 = None\n    view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]);  addmm_34 = None\n    mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n    mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None\n    pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n    mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None\n    add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None\n    mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None\n    tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None\n    add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0)\n    mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None\n    mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8);  tanh_8 = None\n    sub_49 = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None\n    mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None\n    mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None\n    mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715)\n    pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None\n    mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None\n    mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None\n    add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None\n    mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None\n    add_112 = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None\n    view_199 = torch.ops.aten.view.default(add_112, [64, 3072]);  add_112 = None\n    mm_29 = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None\n    permute_166 = torch.ops.aten.permute.default(view_199, [1, 0])\n    mm_30 = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None\n    permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None\n    sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None\n    view_200 = torch.ops.aten.view.default(sum_42, [3072]);  sum_42 = None\n    permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None\n    view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]);  mm_29 = None\n    mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None\n    mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768)\n    sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)\n    mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66);  mul_184 = None\n    sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None\n    mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44);  sum_44 = None\n    sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None\n    sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None\n    mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None\n    mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66);  mul_66 = None\n    sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None\n    sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None\n    add_113 = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None\n    view_202 = torch.ops.aten.view.default(add_113, [64, 768])\n    mm_31 = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None\n    permute_170 = torch.ops.aten.permute.default(view_202, [1, 0])\n    permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n    view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None\n    view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None\n    mm_32 = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None\n    permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None\n    sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None\n    view_203 = torch.ops.aten.view.default(sum_47, [768]);  sum_47 = None\n    permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None\n    view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]);  mm_31 = None\n    view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]);  view_204 = None\n    permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None\n    _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None\n    getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0]\n    getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1]\n    getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None\n    permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None\n    view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]);  permute_174 = None\n    permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None\n    view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]);  permute_175 = None\n    permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None\n    view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]);  permute_176 = None\n    cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None\n    view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]);  cat_3 = None\n    mm_33 = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None\n    permute_178 = torch.ops.aten.permute.default(view_209, [1, 0])\n    mm_34 = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None\n    permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None\n    sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None\n    view_210 = torch.ops.aten.view.default(sum_48, [2304]);  sum_48 = None\n    permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None\n    view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]);  mm_33 = None\n    mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None\n    mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768)\n    sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)\n    mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64);  mul_191 = None\n    sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None\n    mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50);  sum_50 = None\n    sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None\n    sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None\n    mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None\n    mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64);  mul_64 = None\n    sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None\n    sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None\n    add_114 = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None\n    view_212 = torch.ops.aten.view.default(add_114, [64, 768])\n    mm_35 = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None\n    permute_182 = torch.ops.aten.permute.default(view_212, [1, 0])\n    mm_36 = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None\n    permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None\n    sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None\n    view_213 = torch.ops.aten.view.default(sum_53, [768]);  sum_53 = None\n    permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None\n    view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]);  mm_35 = None\n    view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]);  addmm_30 = None\n    mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n    mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None\n    pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n    mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None\n    add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None\n    mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None\n    tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None\n    add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0)\n    mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None\n    mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7);  tanh_7 = None\n    sub_56 = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None\n    mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None\n    mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None\n    mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715)\n    pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None\n    mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None\n    mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None\n    add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None\n    mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None\n    add_116 = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None\n    view_215 = torch.ops.aten.view.default(add_116, [64, 3072]);  add_116 = None\n    mm_37 = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None\n    permute_186 = torch.ops.aten.permute.default(view_215, [1, 0])\n    mm_38 = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None\n    permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None\n    sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None\n    view_216 = torch.ops.aten.view.default(sum_54, [3072]);  sum_54 = None\n    permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None\n    view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]);  mm_37 = None\n    mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None\n    mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768)\n    sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)\n    mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58);  mul_207 = None\n    sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None\n    mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56);  sum_56 = None\n    sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None\n    sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None\n    mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None\n    mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58);  mul_58 = None\n    sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None\n    sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None\n    add_117 = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None\n    view_218 = torch.ops.aten.view.default(add_117, [64, 768])\n    mm_39 = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None\n    permute_190 = torch.ops.aten.permute.default(view_218, [1, 0])\n    permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n    view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None\n    view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None\n    mm_40 = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None\n    permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None\n    sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None\n    view_219 = torch.ops.aten.view.default(sum_59, [768]);  sum_59 = None\n    permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None\n    view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]);  mm_39 = None\n    view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]);  view_220 = None\n    permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None\n    _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None\n    getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0]\n    getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1]\n    getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None\n    permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None\n    view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]);  permute_194 = None\n    permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None\n    view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]);  permute_195 = None\n    permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None\n    view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]);  permute_196 = None\n    cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None\n    view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]);  cat_4 = None\n    mm_41 = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None\n    permute_198 = torch.ops.aten.permute.default(view_225, [1, 0])\n    mm_42 = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None\n    permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None\n    sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None\n    view_226 = torch.ops.aten.view.default(sum_60, [2304]);  sum_60 = None\n    permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None\n    view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]);  mm_41 = None\n    mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None\n    mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768)\n    sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)\n    mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56);  mul_214 = None\n    sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None\n    mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62);  sum_62 = None\n    sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None\n    sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None\n    mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None\n    mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56);  mul_56 = None\n    sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None\n    sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None\n    add_118 = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None\n    view_228 = torch.ops.aten.view.default(add_118, [64, 768])\n    mm_43 = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None\n    permute_202 = torch.ops.aten.permute.default(view_228, [1, 0])\n    mm_44 = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None\n    permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None\n    sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None\n    view_229 = torch.ops.aten.view.default(sum_65, [768]);  sum_65 = None\n    permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None\n    view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]);  mm_43 = None\n    view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]);  addmm_26 = None\n    mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n    mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None\n    pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n    mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None\n    add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None\n    mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None\n    tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None\n    add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0)\n    mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None\n    mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6);  tanh_6 = None\n    sub_63 = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None\n    mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None\n    mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None\n    mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715)\n    pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None\n    mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None\n    mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None\n    add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None\n    mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None\n    add_120 = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None\n    view_231 = torch.ops.aten.view.default(add_120, [64, 3072]);  add_120 = None\n    mm_45 = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None\n    permute_206 = torch.ops.aten.permute.default(view_231, [1, 0])\n    mm_46 = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None\n    permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None\n    sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None\n    view_232 = torch.ops.aten.view.default(sum_66, [3072]);  sum_66 = None\n    permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None\n    view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]);  mm_45 = None\n    mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None\n    mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768)\n    sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)\n    mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50);  mul_230 = None\n    sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None\n    mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68);  sum_68 = None\n    sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None\n    sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None\n    mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None\n    mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50);  mul_50 = None\n    sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None\n    sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None\n    add_121 = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None\n    view_234 = torch.ops.aten.view.default(add_121, [64, 768])\n    mm_47 = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None\n    permute_210 = torch.ops.aten.permute.default(view_234, [1, 0])\n    permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n    view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None\n    view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None\n    mm_48 = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None\n    permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None\n    sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None\n    view_235 = torch.ops.aten.view.default(sum_71, [768]);  sum_71 = None\n    permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None\n    view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]);  mm_47 = None\n    view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]);  view_236 = None\n    permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None\n    _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None\n    getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0]\n    getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1]\n    getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None\n    permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None\n    view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]);  permute_214 = None\n    permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None\n    view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]);  permute_215 = None\n    permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None\n    view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]);  permute_216 = None\n    cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None\n    view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]);  cat_5 = None\n    mm_49 = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None\n    permute_218 = torch.ops.aten.permute.default(view_241, [1, 0])\n    mm_50 = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None\n    permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None\n    sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None\n    view_242 = torch.ops.aten.view.default(sum_72, [2304]);  sum_72 = None\n    permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None\n    view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]);  mm_49 = None\n    mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None\n    mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768)\n    sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)\n    mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48);  mul_237 = None\n    sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None\n    mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74);  sum_74 = None\n    sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None\n    sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None\n    mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None\n    mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48);  mul_48 = None\n    sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None\n    sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None\n    add_122 = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None\n    view_244 = torch.ops.aten.view.default(add_122, [64, 768])\n    mm_51 = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None\n    permute_222 = torch.ops.aten.permute.default(view_244, [1, 0])\n    mm_52 = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None\n    permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None\n    sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None\n    view_245 = torch.ops.aten.view.default(sum_77, [768]);  sum_77 = None\n    permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None\n    view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]);  mm_51 = None\n    view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]);  addmm_22 = None\n    mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n    mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None\n    pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n    mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None\n    add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None\n    mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None\n    tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None\n    add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0)\n    mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None\n    mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5);  tanh_5 = None\n    sub_70 = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None\n    mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None\n    mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None\n    mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715)\n    pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None\n    mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None\n    mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None\n    add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None\n    mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None\n    add_124 = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None\n    view_247 = torch.ops.aten.view.default(add_124, [64, 3072]);  add_124 = None\n    mm_53 = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None\n    permute_226 = torch.ops.aten.permute.default(view_247, [1, 0])\n    mm_54 = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None\n    permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None\n    sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None\n    view_248 = torch.ops.aten.view.default(sum_78, [3072]);  sum_78 = None\n    permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None\n    view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]);  mm_53 = None\n    mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None\n    mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768)\n    sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)\n    mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42);  mul_253 = None\n    sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None\n    mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80);  sum_80 = None\n    sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None\n    sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None\n    mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None\n    mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42);  mul_42 = None\n    sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None\n    sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None\n    add_125 = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None\n    view_250 = torch.ops.aten.view.default(add_125, [64, 768])\n    mm_55 = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None\n    permute_230 = torch.ops.aten.permute.default(view_250, [1, 0])\n    permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n    view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None\n    view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None\n    mm_56 = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None\n    permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None\n    sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None\n    view_251 = torch.ops.aten.view.default(sum_83, [768]);  sum_83 = None\n    permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None\n    view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]);  mm_55 = None\n    view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]);  view_252 = None\n    permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None\n    _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None\n    getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0]\n    getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1]\n    getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None\n    permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None\n    view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]);  permute_234 = None\n    permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None\n    view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]);  permute_235 = None\n    permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None\n    view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]);  permute_236 = None\n    cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None\n    view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]);  cat_6 = None\n    mm_57 = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None\n    permute_238 = torch.ops.aten.permute.default(view_257, [1, 0])\n    mm_58 = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None\n    permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None\n    sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None\n    view_258 = torch.ops.aten.view.default(sum_84, [2304]);  sum_84 = None\n    permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None\n    view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]);  mm_57 = None\n    mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None\n    mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768)\n    sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)\n    mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40);  mul_260 = None\n    sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None\n    mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86);  sum_86 = None\n    sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None\n    sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None\n    mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None\n    mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40);  mul_40 = None\n    sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None\n    sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None\n    add_126 = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None\n    view_260 = torch.ops.aten.view.default(add_126, [64, 768])\n    mm_59 = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None\n    permute_242 = torch.ops.aten.permute.default(view_260, [1, 0])\n    mm_60 = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None\n    permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None\n    sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None\n    view_261 = torch.ops.aten.view.default(sum_89, [768]);  sum_89 = None\n    permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None\n    view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]);  mm_59 = None\n    view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]);  addmm_18 = None\n    mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n    mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None\n    pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n    mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None\n    add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None\n    mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None\n    tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None\n    add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0)\n    mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None\n    mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4);  tanh_4 = None\n    sub_77 = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None\n    mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None\n    mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None\n    mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715)\n    pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None\n    mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None\n    mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None\n    add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None\n    mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None\n    add_128 = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None\n    view_263 = torch.ops.aten.view.default(add_128, [64, 3072]);  add_128 = None\n    mm_61 = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None\n    permute_246 = torch.ops.aten.permute.default(view_263, [1, 0])\n    mm_62 = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None\n    permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None\n    sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None\n    view_264 = torch.ops.aten.view.default(sum_90, [3072]);  sum_90 = None\n    permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None\n    view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]);  mm_61 = None\n    mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None\n    mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768)\n    sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)\n    mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34);  mul_276 = None\n    sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None\n    mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92);  sum_92 = None\n    sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None\n    sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None\n    mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None\n    mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34);  mul_34 = None\n    sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None\n    sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None\n    add_129 = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None\n    view_266 = torch.ops.aten.view.default(add_129, [64, 768])\n    mm_63 = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None\n    permute_250 = torch.ops.aten.permute.default(view_266, [1, 0])\n    permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n    view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None\n    view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None\n    mm_64 = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None\n    permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None\n    sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None\n    view_267 = torch.ops.aten.view.default(sum_95, [768]);  sum_95 = None\n    permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None\n    view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]);  mm_63 = None\n    view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]);  view_268 = None\n    permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None\n    _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None\n    getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0]\n    getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1]\n    getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None\n    permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None\n    view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]);  permute_254 = None\n    permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None\n    view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]);  permute_255 = None\n    permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None\n    view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]);  permute_256 = None\n    cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None\n    view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]);  cat_7 = None\n    mm_65 = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None\n    permute_258 = torch.ops.aten.permute.default(view_273, [1, 0])\n    mm_66 = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None\n    permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None\n    sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None\n    view_274 = torch.ops.aten.view.default(sum_96, [2304]);  sum_96 = None\n    permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None\n    view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]);  mm_65 = None\n    mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None\n    mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768)\n    sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)\n    mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32);  mul_283 = None\n    sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None\n    mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98);  sum_98 = None\n    sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None\n    sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None\n    mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None\n    mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32);  mul_32 = None\n    sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None\n    sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None\n    add_130 = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None\n    view_276 = torch.ops.aten.view.default(add_130, [64, 768])\n    mm_67 = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None\n    permute_262 = torch.ops.aten.permute.default(view_276, [1, 0])\n    mm_68 = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None\n    permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None\n    sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None\n    view_277 = torch.ops.aten.view.default(sum_101, [768]);  sum_101 = None\n    permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None\n    view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]);  mm_67 = None\n    view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]);  addmm_14 = None\n    mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n    mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None\n    pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n    mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None\n    add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None\n    mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None\n    tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None\n    add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0)\n    mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None\n    mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3);  tanh_3 = None\n    sub_84 = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None\n    mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None\n    mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None\n    mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715)\n    pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None\n    mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None\n    mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None\n    add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None\n    mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None\n    add_132 = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None\n    view_279 = torch.ops.aten.view.default(add_132, [64, 3072]);  add_132 = None\n    mm_69 = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None\n    permute_266 = torch.ops.aten.permute.default(view_279, [1, 0])\n    mm_70 = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None\n    permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None\n    sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None\n    view_280 = torch.ops.aten.view.default(sum_102, [3072]);  sum_102 = None\n    permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None\n    view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]);  mm_69 = None\n    mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None\n    mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768)\n    sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)\n    mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26);  mul_299 = None\n    sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None\n    mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104);  sum_104 = None\n    sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None\n    sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None\n    mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None\n    mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26);  mul_26 = None\n    sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None\n    sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None\n    add_133 = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None\n    view_282 = torch.ops.aten.view.default(add_133, [64, 768])\n    mm_71 = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None\n    permute_270 = torch.ops.aten.permute.default(view_282, [1, 0])\n    permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n    view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None\n    view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None\n    mm_72 = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None\n    permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None\n    sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None\n    view_283 = torch.ops.aten.view.default(sum_107, [768]);  sum_107 = None\n    permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None\n    view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]);  mm_71 = None\n    view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]);  view_284 = None\n    permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None\n    _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None\n    getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0]\n    getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1]\n    getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None\n    permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None\n    view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]);  permute_274 = None\n    permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None\n    view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]);  permute_275 = None\n    permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None\n    view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]);  permute_276 = None\n    cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None\n    view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]);  cat_8 = None\n    mm_73 = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None\n    permute_278 = torch.ops.aten.permute.default(view_289, [1, 0])\n    mm_74 = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None\n    permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None\n    sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None\n    view_290 = torch.ops.aten.view.default(sum_108, [2304]);  sum_108 = None\n    permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None\n    view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]);  mm_73 = None\n    mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None\n    mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768)\n    sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)\n    mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24);  mul_306 = None\n    sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None\n    mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110);  sum_110 = None\n    sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None\n    sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None\n    mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None\n    mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24);  mul_24 = None\n    sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None\n    sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None\n    add_134 = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None\n    view_292 = torch.ops.aten.view.default(add_134, [64, 768])\n    mm_75 = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None\n    permute_282 = torch.ops.aten.permute.default(view_292, [1, 0])\n    mm_76 = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None\n    permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None\n    sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None\n    view_293 = torch.ops.aten.view.default(sum_113, [768]);  sum_113 = None\n    permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None\n    view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]);  mm_75 = None\n    view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]);  addmm_10 = None\n    mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n    mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None\n    pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n    mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None\n    add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None\n    mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None\n    tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None\n    add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0)\n    mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None\n    mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2);  tanh_2 = None\n    sub_91 = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None\n    mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None\n    mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None\n    mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715)\n    pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None\n    mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None\n    mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None\n    add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None\n    mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None\n    add_136 = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None\n    view_295 = torch.ops.aten.view.default(add_136, [64, 3072]);  add_136 = None\n    mm_77 = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None\n    permute_286 = torch.ops.aten.permute.default(view_295, [1, 0])\n    mm_78 = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None\n    permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None\n    sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None\n    view_296 = torch.ops.aten.view.default(sum_114, [3072]);  sum_114 = None\n    permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None\n    view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]);  mm_77 = None\n    mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None\n    mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768)\n    sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)\n    mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18);  mul_322 = None\n    sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None\n    mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116);  sum_116 = None\n    sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None\n    sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None\n    mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None\n    mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18);  mul_18 = None\n    sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None\n    sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None\n    add_137 = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None\n    view_298 = torch.ops.aten.view.default(add_137, [64, 768])\n    mm_79 = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None\n    permute_290 = torch.ops.aten.permute.default(view_298, [1, 0])\n    permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n    view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None\n    view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None\n    mm_80 = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None\n    permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None\n    sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None\n    view_299 = torch.ops.aten.view.default(sum_119, [768]);  sum_119 = None\n    permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None\n    view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]);  mm_79 = None\n    view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]);  view_300 = None\n    permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None\n    _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None\n    getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0]\n    getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1]\n    getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None\n    permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None\n    view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]);  permute_294 = None\n    permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None\n    view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]);  permute_295 = None\n    permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None\n    view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]);  permute_296 = None\n    cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None\n    view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]);  cat_9 = None\n    mm_81 = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None\n    permute_298 = torch.ops.aten.permute.default(view_305, [1, 0])\n    mm_82 = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None\n    permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None\n    sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None\n    view_306 = torch.ops.aten.view.default(sum_120, [2304]);  sum_120 = None\n    permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None\n    view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]);  mm_81 = None\n    mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None\n    mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768)\n    sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)\n    mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16);  mul_329 = None\n    sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None\n    mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122);  sum_122 = None\n    sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None\n    sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None\n    mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None\n    mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16);  mul_16 = None\n    sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None\n    sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None\n    add_138 = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None\n    view_308 = torch.ops.aten.view.default(add_138, [64, 768])\n    mm_83 = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None\n    permute_302 = torch.ops.aten.permute.default(view_308, [1, 0])\n    mm_84 = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None\n    permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None\n    sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None\n    view_309 = torch.ops.aten.view.default(sum_125, [768]);  sum_125 = None\n    permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None\n    view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]);  mm_83 = None\n    view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]);  addmm_6 = None\n    mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n    mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None\n    pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n    mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None\n    add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None\n    mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None\n    tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None\n    add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0)\n    mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None\n    mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1);  tanh_1 = None\n    sub_98 = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None\n    mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None\n    mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None\n    mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715)\n    pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None\n    mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None\n    mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None\n    add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None\n    mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None\n    add_140 = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None\n    view_311 = torch.ops.aten.view.default(add_140, [64, 3072]);  add_140 = None\n    mm_85 = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None\n    permute_306 = torch.ops.aten.permute.default(view_311, [1, 0])\n    mm_86 = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None\n    permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None\n    sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None\n    view_312 = torch.ops.aten.view.default(sum_126, [3072]);  sum_126 = None\n    permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None\n    view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]);  mm_85 = None\n    mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None\n    mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768)\n    sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)\n    mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10);  mul_345 = None\n    sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None\n    mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128);  sum_128 = None\n    sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None\n    sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None\n    mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None\n    mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10);  mul_10 = None\n    sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None\n    sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None\n    add_141 = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None\n    view_314 = torch.ops.aten.view.default(add_141, [64, 768])\n    mm_87 = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None\n    permute_310 = torch.ops.aten.permute.default(view_314, [1, 0])\n    permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n    view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None\n    view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None\n    mm_88 = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None\n    permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None\n    sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None\n    view_315 = torch.ops.aten.view.default(sum_131, [768]);  sum_131 = None\n    permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None\n    view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]);  mm_87 = None\n    view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]);  view_316 = None\n    permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None\n    _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None\n    getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0]\n    getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1]\n    getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None\n    permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None\n    view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]);  permute_314 = None\n    permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None\n    view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]);  permute_315 = None\n    permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None\n    view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]);  permute_316 = None\n    cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None\n    view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]);  cat_10 = None\n    mm_89 = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None\n    permute_318 = torch.ops.aten.permute.default(view_321, [1, 0])\n    mm_90 = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None\n    permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None\n    sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None\n    view_322 = torch.ops.aten.view.default(sum_132, [2304]);  sum_132 = None\n    permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None\n    view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]);  mm_89 = None\n    mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None\n    mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768)\n    sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)\n    mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8);  mul_352 = None\n    sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None\n    mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134);  sum_134 = None\n    sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None\n    sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None\n    mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None\n    mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8);  mul_8 = None\n    sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None\n    sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None\n    add_142 = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None\n    view_324 = torch.ops.aten.view.default(add_142, [64, 768])\n    mm_91 = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None\n    permute_322 = torch.ops.aten.permute.default(view_324, [1, 0])\n    mm_92 = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None\n    permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None\n    sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None\n    view_325 = torch.ops.aten.view.default(sum_137, [768]);  sum_137 = None\n    permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None\n    view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]);  mm_91 = None\n    view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]);  addmm_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n    mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None\n    pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n    mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None\n    add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None\n    mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None\n    tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None\n    add_7 = torch.ops.aten.add.Tensor(tanh, 1.0)\n    mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None\n    mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh);  tanh = None\n    sub_105 = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None\n    mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None\n    mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None\n    mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715)\n    pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None\n    mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None\n    mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None\n    add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None\n    mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None\n    add_144 = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None\n    view_327 = torch.ops.aten.view.default(add_144, [64, 3072]);  add_144 = None\n    mm_93 = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None\n    permute_326 = torch.ops.aten.permute.default(view_327, [1, 0])\n    mm_94 = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None\n    permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None\n    sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None\n    view_328 = torch.ops.aten.view.default(sum_138, [3072]);  sum_138 = None\n    permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None\n    view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]);  mm_93 = None\n    mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None\n    mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768)\n    sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)\n    mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2);  mul_368 = None\n    sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None\n    mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140);  sum_140 = None\n    sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None\n    sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None\n    mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None\n    mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2);  mul_2 = None\n    sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None\n    sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None\n    add_145 = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None\n    view_330 = torch.ops.aten.view.default(add_145, [64, 768])\n    mm_95 = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None\n    permute_330 = torch.ops.aten.permute.default(view_330, [1, 0])\n    permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n    view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None\n    view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None\n    mm_96 = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None\n    permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None\n    sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None\n    view_331 = torch.ops.aten.view.default(sum_143, [768]);  sum_143 = None\n    permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None\n    view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]);  mm_95 = None\n    view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]);  view_332 = None\n    permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None\n    _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None\n    getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0]\n    getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1]\n    getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None\n    permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None\n    view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]);  permute_334 = None\n    permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None\n    view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]);  permute_335 = None\n    permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None\n    view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]);  permute_336 = None\n    cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None\n    view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]);  cat_11 = None\n    mm_97 = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None\n    permute_338 = torch.ops.aten.permute.default(view_337, [1, 0])\n    mm_98 = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None\n    permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None\n    sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None\n    view_338 = torch.ops.aten.view.default(sum_144, [2304]);  sum_144 = None\n    permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None\n    view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]);  mm_97 = None\n    mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None\n    mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768)\n    sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)\n    mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul);  mul_375 = None\n    sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None\n    mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146);  sum_146 = None\n    sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None\n    sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None\n    mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None\n    mul_380 = torch.ops.aten.mul.Tensor(view_339, mul);  mul = None\n    sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None\n    sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None\n    add_146 = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None\n    eq = torch.ops.aten.eq.Scalar(unsqueeze, -1)\n    unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None\n    full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146);  unsqueeze_1 = None\n    full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True);  full_default_5 = unsqueeze = where = None\n    eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1)\n    unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None\n    where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146);  unsqueeze_2 = full_default_4 = add_146 = None\n    full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True);  full_default_7 = primals_1 = where_1 = None\n    add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None\n    return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
+	"[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[26]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[34]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[35]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[47]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[48]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[60]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[61]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[73]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[74]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[86]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[87]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[99]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[100]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[112]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[113]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[125]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[126]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[138]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[139]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[151]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[152]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[164]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[165]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[177]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[178]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aba44qxan7tyih7ljdxyqka53vkn25cmdzgth5cyl2s7qorx7vi] example_inputs[184]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[qitbyzr7emyctium3gjpb6gcr75vrxwd24qiyojnre7qqd7zo4f] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[u55blbbc73afkevwx6ofprgxxytl7dbrkgoal4z3b6od3qdlugs] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 1, 50304]), stride=(50304, 50304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
+	"[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_backward]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
+	"[y3e3yuxtssnww62nt5exdblxjs4qqfe6m45lbogy57sjgkkgd7s] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259]",
+	"[gqceoov337f2fxydib545wytq5n6f565gacorvs7dzatfibcnjq] fx_kwargs[user_visible_outputs]: {'add_147': None, '_unsafe_index_put': None, 'sum_147': None, 'sum_148': None, 'permute_340': None, 'view_338': None, 'permute_332': None, 'view_331': None, 'sum_141': None, 'sum_142': None, 'permute_328': None, 'view_328': None, 'permute_324': None, 'view_325': None, 'sum_135': None, 'sum_136': None, 'permute_320': None, 'view_322': None, 'permute_312': None, 'view_315': None, 'sum_129': None, 'sum_130': None, 'permute_308': None, 'view_312': None, 'permute_304': None, 'view_309': None, 'sum_123': None, 'sum_124': None, 'permute_300': None, 'view_306': None, 'permute_292': None, 'view_299': None, 'sum_117': None, 'sum_118': None, 'permute_288': None, 'view_296': None, 'permute_284': None, 'view_293': None, 'sum_111': None, 'sum_112': None, 'permute_280': None, 'view_290': None, 'permute_272': None, 'view_283': None, 'sum_105': None, 'sum_106': None, 'permute_268': None, 'view_280': None, 'permute_264': None, 'view_277': None, 'sum_99': None, 'sum_100': None, 'permute_260': None, 'view_274': None, 'permute_252': None, 'view_267': None, 'sum_93': None, 'sum_94': None, 'permute_248': None, 'view_264': None, 'permute_244': None, 'view_261': None, 'sum_87': None, 'sum_88': None, 'permute_240': None, 'view_258': None, 'permute_232': None, 'view_251': None, 'sum_81': None, 'sum_82': None, 'permute_228': None, 'view_248': None, 'permute_224': None, 'view_245': None, 'sum_75': None, 'sum_76': None, 'permute_220': None, 'view_242': None, 'permute_212': None, 'view_235': None, 'sum_69': None, 'sum_70': None, 'permute_208': None, 'view_232': None, 'permute_204': None, 'view_229': None, 'sum_63': None, 'sum_64': None, 'permute_200': None, 'view_226': None, 'permute_192': None, 'view_219': None, 'sum_57': None, 'sum_58': None, 'permute_188': None, 'view_216': None, 'permute_184': None, 'view_213': None, 'sum_51': None, 'sum_52': None, 'permute_180': None, 'view_210': None, 'permute_172': None, 'view_203': None, 'sum_45': None, 'sum_46': None, 'permute_168': None, 'view_200': None, 'permute_164': None, 'view_197': None, 'sum_39': None, 'sum_40': None, 'permute_160': None, 'view_194': None, 'permute_152': None, 'view_187': None, 'sum_33': None, 'sum_34': None, 'permute_148': None, 'view_184': None, 'permute_144': None, 'view_181': None, 'sum_27': None, 'sum_28': None, 'permute_140': None, 'view_178': None, 'permute_132': None, 'view_171': None, 'sum_21': None, 'sum_22': None, 'permute_128': None, 'view_168': None, 'permute_124': None, 'view_165': None, 'sum_15': None, 'sum_16': None, 'permute_120': None, 'view_162': None, 'permute_112': None, 'view_155': None, 'sum_9': None, 'sum_10': None, 'permute_108': None, 'view_152': None, 'permute_104': None, 'view_149': None, 'sum_3': None, 'sum_4': None}",
+	"[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[0]: 260",
+	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
+	"[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)",
+	"[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}",
+	"[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
+	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
+	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
+	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
+	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
+	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
+	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
+	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
+	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
+	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
+	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None",
+	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
+	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
+	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
+	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
+	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
+	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
+	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
+	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
+	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
+	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
+	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
+	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
+	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
+	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
+	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
+	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
+	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None",
+	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
+	"[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
+	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
+	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
+	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
+	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
+	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
+	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
+	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
+	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
+	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
+	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
+	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
+	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
+	"[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"
+	]
+	},
+	"ph": "i",
+	"pid": 0,
+	"s": "p"
+	}
+V0806 13:56:05.129000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1, "has_payload": "44fe4dd764a6fe29aa0b4a5ce2ca423f"}
+	{"key": "foijwxq2i7flux6r2ba5gws3rpialjqk5cmhfg54f7i2spz557vl", "cache_state": "miss", "components": ["[4hgegienmiaqunsqbxyycnrivovz4r63bypl5psmqilwotq5er6] gm: GraphModule()\n\n\n\ndef forward(self, primals_1, primals_4, primals_10, primals_16, primals_22, primals_28, primals_34, primals_40, primals_46, primals_52, primals_58, primals_64, primals_70, primals_76, primals_82, primals_88, primals_94, primals_100, primals_106, primals_112, primals_118, primals_124, primals_130, primals_136, primals_142, primals_148, unsqueeze, mul, view, permute_1, permute_2, permute_3, getitem_5, getitem_6, getitem_7, getitem_8, mul_2, view_8, addmm_2, view_10, mul_8, view_12, permute_9, permute_10, permute_11, getitem_16, getitem_17, getitem_18, getitem_19, mul_10, view_20, addmm_6, view_22, mul_16, view_24, permute_17, permute_18, permute_19, getitem_27, getitem_28, getitem_29, getitem_30, mul_18, view_32, addmm_10, view_34, mul_24, view_36, permute_25, permute_26, permute_27, getitem_38, getitem_39, getitem_40, getitem_41, mul_26, view_44, addmm_14, view_46, mul_32, view_48, permute_33, permute_34, permute_35, getitem_49, getitem_50, getitem_51, getitem_52, mul_34, view_56, addmm_18, view_58, mul_40, view_60, permute_41, permute_42, permute_43, getitem_60, getitem_61, getitem_62, getitem_63, mul_42, view_68, addmm_22, view_70, mul_48, view_72, permute_49, permute_50, permute_51, getitem_71, getitem_72, getitem_73, getitem_74, mul_50, view_80, addmm_26, view_82, mul_56, view_84, permute_57, permute_58, permute_59, getitem_82, getitem_83, getitem_84, getitem_85, mul_58, view_92, addmm_30, view_94, mul_64, view_96, permute_65, permute_66, permute_67, getitem_93, getitem_94, getitem_95, getitem_96, mul_66, view_104, addmm_34, view_106, mul_72, view_108, permute_73, permute_74, permute_75, getitem_104, getitem_105, getitem_106, getitem_107, mul_74, view_116, addmm_38, view_118, mul_80, view_120, permute_81, permute_82, permute_83, getitem_115, getitem_116, getitem_117, getitem_118, mul_82, view_128, addmm_42, view_130, mul_88, view_132, permute_89, permute_90, permute_91, getitem_126, getitem_127, getitem_128, getitem_129, mul_90, view_140, addmm_46, view_142, mul_96, full_default, view_144, permute_99, div, permute_101, permute_105, div_1, permute_109, permute_117, div_2, permute_121, permute_125, div_3, permute_129, permute_137, div_4, permute_141, permute_145, div_5, permute_149, permute_157, div_6, permute_161, permute_165, div_7, permute_169, permute_177, div_8, permute_181, permute_185, div_9, permute_189, permute_197, div_10, permute_201, permute_205, div_11, permute_209, permute_217, div_12, permute_221, permute_225, div_13, permute_229, permute_237, div_14, permute_241, permute_245, div_15, permute_249, permute_257, div_16, permute_261, permute_265, div_17, permute_269, permute_277, div_18, permute_281, permute_285, div_19, permute_289, permute_297, div_20, permute_301, permute_305, div_21, permute_309, permute_317, div_22, permute_321, permute_325, div_23, permute_329, permute_337, div_24, tangents_1):\n    view_146 = torch.ops.aten.view.default(tangents_1, [1, 50304]);  tangents_1 = None\n    permute_97 = torch.ops.aten.permute.default(view_146, [1, 0])\n    mm_1 = torch.ops.aten.mm.default(permute_97, view_144);  permute_97 = view_144 = None\n    permute_98 = torch.ops.aten.permute.default(mm_1, [1, 0]);  mm_1 = None\n    mm_2 = torch.ops.aten.mm.default(view_146, permute_99);  view_146 = permute_99 = None\n    view_147 = torch.ops.aten.view.default(mm_2, [1, 1, 768]);  mm_2 = None\n    permute_100 = torch.ops.aten.permute.default(permute_98, [1, 0]);  permute_98 = None\n    full_default_1 = torch.ops.aten.full.default([1, 64, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    index_put = torch.ops.aten.index_put.default(full_default_1, [None, full_default], view_147, True);  full_default_1 = full_default = view_147 = None\n    mul_99 = torch.ops.aten.mul.Tensor(index_put, primals_148);  primals_148 = None\n    mul_100 = torch.ops.aten.mul.Tensor(mul_99, 768)\n    sum_1 = torch.ops.aten.sum.dim_IntList(mul_99, [2], True)\n    mul_101 = torch.ops.aten.mul.Tensor(mul_99, mul_96);  mul_99 = None\n    sum_2 = torch.ops.aten.sum.dim_IntList(mul_101, [2], True);  mul_101 = None\n    mul_102 = torch.ops.aten.mul.Tensor(mul_96, sum_2);  sum_2 = None\n    sub_26 = torch.ops.aten.sub.Tensor(mul_100, sum_1);  mul_100 = sum_1 = None\n    sub_27 = torch.ops.aten.sub.Tensor(sub_26, mul_102);  sub_26 = mul_102 = None\n    mul_103 = torch.ops.aten.mul.Tensor(div, sub_27);  div = sub_27 = None\n    mul_104 = torch.ops.aten.mul.Tensor(index_put, mul_96);  mul_96 = None\n    sum_3 = torch.ops.aten.sum.dim_IntList(mul_104, [0, 1]);  mul_104 = None\n    sum_4 = torch.ops.aten.sum.dim_IntList(index_put, [0, 1]);  index_put = None\n    view_148 = torch.ops.aten.view.default(mul_103, [64, 768])\n    mm_3 = torch.ops.aten.mm.default(view_148, permute_101);  permute_101 = None\n    permute_102 = torch.ops.aten.permute.default(view_148, [1, 0])\n    mm_4 = torch.ops.aten.mm.default(permute_102, view_142);  permute_102 = view_142 = None\n    permute_103 = torch.ops.aten.permute.default(mm_4, [1, 0]);  mm_4 = None\n    sum_5 = torch.ops.aten.sum.dim_IntList(view_148, [0], True);  view_148 = None\n    view_149 = torch.ops.aten.view.default(sum_5, [768]);  sum_5 = None\n    permute_104 = torch.ops.aten.permute.default(permute_103, [1, 0]);  permute_103 = None\n    view_150 = torch.ops.aten.view.default(mm_3, [1, 64, 3072]);  mm_3 = None\n    view_141 = torch.ops.aten.view.default(addmm_46, [1, 64, 3072]);  addmm_46 = None\n    mul_92 = torch.ops.aten.mul.Tensor(view_141, 0.5)\n    mul_105 = torch.ops.aten.mul.Tensor(view_150, mul_92);  mul_92 = None\n    pow_12 = torch.ops.aten.pow.Tensor_Scalar(view_141, 3.0)\n    mul_93 = torch.ops.aten.mul.Tensor(pow_12, 0.044715);  pow_12 = None\n    add_94 = torch.ops.aten.add.Tensor(view_141, mul_93);  mul_93 = None\n    mul_94 = torch.ops.aten.mul.Tensor(add_94, 0.7978845608028654);  add_94 = None\n    tanh_11 = torch.ops.aten.tanh.default(mul_94);  mul_94 = None\n    add_95 = torch.ops.aten.add.Tensor(tanh_11, 1.0)\n    mul_106 = torch.ops.aten.mul.Tensor(view_150, add_95);  view_150 = add_95 = None\n    mul_107 = torch.ops.aten.mul.Tensor(tanh_11, tanh_11);  tanh_11 = None\n    sub_28 = torch.ops.aten.sub.Tensor(1, mul_107);  mul_107 = None\n    mul_108 = torch.ops.aten.mul.Tensor(mul_105, sub_28);  mul_105 = sub_28 = None\n    mul_109 = torch.ops.aten.mul.Tensor(mul_108, 0.7978845608028654);  mul_108 = None\n    mul_110 = torch.ops.aten.mul.Tensor(mul_109, 0.044715)\n    pow_13 = torch.ops.aten.pow.Tensor_Scalar(view_141, 2.0);  view_141 = None\n    mul_111 = torch.ops.aten.mul.Scalar(pow_13, 3.0);  pow_13 = None\n    mul_112 = torch.ops.aten.mul.Tensor(mul_110, mul_111);  mul_110 = mul_111 = None\n    add_99 = torch.ops.aten.add.Tensor(mul_109, mul_112);  mul_109 = mul_112 = None\n    mul_113 = torch.ops.aten.mul.Tensor(mul_106, 0.5);  mul_106 = None\n    add_100 = torch.ops.aten.add.Tensor(add_99, mul_113);  add_99 = mul_113 = None\n    view_151 = torch.ops.aten.view.default(add_100, [64, 3072]);  add_100 = None\n    mm_5 = torch.ops.aten.mm.default(view_151, permute_105);  permute_105 = None\n    permute_106 = torch.ops.aten.permute.default(view_151, [1, 0])\n    mm_6 = torch.ops.aten.mm.default(permute_106, view_140);  permute_106 = view_140 = None\n    permute_107 = torch.ops.aten.permute.default(mm_6, [1, 0]);  mm_6 = None\n    sum_6 = torch.ops.aten.sum.dim_IntList(view_151, [0], True);  view_151 = None\n    view_152 = torch.ops.aten.view.default(sum_6, [3072]);  sum_6 = None\n    permute_108 = torch.ops.aten.permute.default(permute_107, [1, 0]);  permute_107 = None\n    view_153 = torch.ops.aten.view.default(mm_5, [1, 64, 768]);  mm_5 = None\n    mul_115 = torch.ops.aten.mul.Tensor(view_153, primals_142);  primals_142 = None\n    mul_116 = torch.ops.aten.mul.Tensor(mul_115, 768)\n    sum_7 = torch.ops.aten.sum.dim_IntList(mul_115, [2], True)\n    mul_117 = torch.ops.aten.mul.Tensor(mul_115, mul_90);  mul_115 = None\n    sum_8 = torch.ops.aten.sum.dim_IntList(mul_117, [2], True);  mul_117 = None\n    mul_118 = torch.ops.aten.mul.Tensor(mul_90, sum_8);  sum_8 = None\n    sub_30 = torch.ops.aten.sub.Tensor(mul_116, sum_7);  mul_116 = sum_7 = None\n    sub_31 = torch.ops.aten.sub.Tensor(sub_30, mul_118);  sub_30 = mul_118 = None\n    mul_119 = torch.ops.aten.mul.Tensor(div_1, sub_31);  div_1 = sub_31 = None\n    mul_120 = torch.ops.aten.mul.Tensor(view_153, mul_90);  mul_90 = None\n    sum_9 = torch.ops.aten.sum.dim_IntList(mul_120, [0, 1]);  mul_120 = None\n    sum_10 = torch.ops.aten.sum.dim_IntList(view_153, [0, 1]);  view_153 = None\n    add_101 = torch.ops.aten.add.Tensor(mul_103, mul_119);  mul_103 = mul_119 = None\n    view_154 = torch.ops.aten.view.default(add_101, [64, 768])\n    mm_7 = torch.ops.aten.mm.default(view_154, permute_109);  permute_109 = None\n    permute_110 = torch.ops.aten.permute.default(view_154, [1, 0])\n    permute_92 = torch.ops.aten.permute.default(getitem_126, [0, 2, 1, 3])\n    view_137 = torch.ops.aten.view.default(permute_92, [1, 64, 768]);  permute_92 = None\n    view_138 = torch.ops.aten.view.default(view_137, [64, 768]);  view_137 = None\n    mm_8 = torch.ops.aten.mm.default(permute_110, view_138);  permute_110 = view_138 = None\n    permute_111 = torch.ops.aten.permute.default(mm_8, [1, 0]);  mm_8 = None\n    sum_11 = torch.ops.aten.sum.dim_IntList(view_154, [0], True);  view_154 = None\n    view_155 = torch.ops.aten.view.default(sum_11, [768]);  sum_11 = None\n    permute_112 = torch.ops.aten.permute.default(permute_111, [1, 0]);  permute_111 = None\n    view_156 = torch.ops.aten.view.default(mm_7, [1, 64, 768]);  mm_7 = None\n    view_157 = torch.ops.aten.view.default(view_156, [1, 64, 12, 64]);  view_156 = None\n    permute_113 = torch.ops.aten.permute.default(view_157, [0, 2, 1, 3]);  view_157 = None\n    _scaled_dot_product_efficient_attention_backward = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_113, permute_90, permute_89, permute_91, None, getitem_126, getitem_127, getitem_128, getitem_129, 0.0, [True, True, True, False], True);  permute_113 = permute_90 = permute_89 = permute_91 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = None\n    getitem_134 = _scaled_dot_product_efficient_attention_backward[0]\n    getitem_135 = _scaled_dot_product_efficient_attention_backward[1]\n    getitem_136 = _scaled_dot_product_efficient_attention_backward[2];  _scaled_dot_product_efficient_attention_backward = None\n    permute_114 = torch.ops.aten.permute.default(getitem_136, [0, 2, 1, 3]);  getitem_136 = None\n    view_158 = torch.ops.aten.view.default(permute_114, [1, 64, 768]);  permute_114 = None\n    permute_115 = torch.ops.aten.permute.default(getitem_134, [0, 2, 1, 3]);  getitem_134 = None\n    view_159 = torch.ops.aten.view.default(permute_115, [1, 64, 768]);  permute_115 = None\n    permute_116 = torch.ops.aten.permute.default(getitem_135, [0, 2, 1, 3]);  getitem_135 = None\n    view_160 = torch.ops.aten.view.default(permute_116, [1, 64, 768]);  permute_116 = None\n    cat = torch.ops.aten.cat.default([view_159, view_160, view_158], 2);  view_159 = view_160 = view_158 = None\n    view_161 = torch.ops.aten.view.default(cat, [64, 2304]);  cat = None\n    mm_9 = torch.ops.aten.mm.default(view_161, permute_117);  permute_117 = None\n    permute_118 = torch.ops.aten.permute.default(view_161, [1, 0])\n    mm_10 = torch.ops.aten.mm.default(permute_118, view_132);  permute_118 = view_132 = None\n    permute_119 = torch.ops.aten.permute.default(mm_10, [1, 0]);  mm_10 = None\n    sum_12 = torch.ops.aten.sum.dim_IntList(view_161, [0], True);  view_161 = None\n    view_162 = torch.ops.aten.view.default(sum_12, [2304]);  sum_12 = None\n    permute_120 = torch.ops.aten.permute.default(permute_119, [1, 0]);  permute_119 = None\n    view_163 = torch.ops.aten.view.default(mm_9, [1, 64, 768]);  mm_9 = None\n    mul_122 = torch.ops.aten.mul.Tensor(view_163, primals_136);  primals_136 = None\n    mul_123 = torch.ops.aten.mul.Tensor(mul_122, 768)\n    sum_13 = torch.ops.aten.sum.dim_IntList(mul_122, [2], True)\n    mul_124 = torch.ops.aten.mul.Tensor(mul_122, mul_88);  mul_122 = None\n    sum_14 = torch.ops.aten.sum.dim_IntList(mul_124, [2], True);  mul_124 = None\n    mul_125 = torch.ops.aten.mul.Tensor(mul_88, sum_14);  sum_14 = None\n    sub_33 = torch.ops.aten.sub.Tensor(mul_123, sum_13);  mul_123 = sum_13 = None\n    sub_34 = torch.ops.aten.sub.Tensor(sub_33, mul_125);  sub_33 = mul_125 = None\n    mul_126 = torch.ops.aten.mul.Tensor(div_2, sub_34);  div_2 = sub_34 = None\n    mul_127 = torch.ops.aten.mul.Tensor(view_163, mul_88);  mul_88 = None\n    sum_15 = torch.ops.aten.sum.dim_IntList(mul_127, [0, 1]);  mul_127 = None\n    sum_16 = torch.ops.aten.sum.dim_IntList(view_163, [0, 1]);  view_163 = None\n    add_102 = torch.ops.aten.add.Tensor(add_101, mul_126);  add_101 = mul_126 = None\n    view_164 = torch.ops.aten.view.default(add_102, [64, 768])\n    mm_11 = torch.ops.aten.mm.default(view_164, permute_121);  permute_121 = None\n    permute_122 = torch.ops.aten.permute.default(view_164, [1, 0])\n    mm_12 = torch.ops.aten.mm.default(permute_122, view_130);  permute_122 = view_130 = None\n    permute_123 = torch.ops.aten.permute.default(mm_12, [1, 0]);  mm_12 = None\n    sum_17 = torch.ops.aten.sum.dim_IntList(view_164, [0], True);  view_164 = None\n    view_165 = torch.ops.aten.view.default(sum_17, [768]);  sum_17 = None\n    permute_124 = torch.ops.aten.permute.default(permute_123, [1, 0]);  permute_123 = None\n    view_166 = torch.ops.aten.view.default(mm_11, [1, 64, 3072]);  mm_11 = None\n    view_129 = torch.ops.aten.view.default(addmm_42, [1, 64, 3072]);  addmm_42 = None\n    mul_84 = torch.ops.aten.mul.Tensor(view_129, 0.5)\n    mul_128 = torch.ops.aten.mul.Tensor(view_166, mul_84);  mul_84 = None\n    pow_11 = torch.ops.aten.pow.Tensor_Scalar(view_129, 3.0)\n    mul_85 = torch.ops.aten.mul.Tensor(pow_11, 0.044715);  pow_11 = None\n    add_86 = torch.ops.aten.add.Tensor(view_129, mul_85);  mul_85 = None\n    mul_86 = torch.ops.aten.mul.Tensor(add_86, 0.7978845608028654);  add_86 = None\n    tanh_10 = torch.ops.aten.tanh.default(mul_86);  mul_86 = None\n    add_87 = torch.ops.aten.add.Tensor(tanh_10, 1.0)\n    mul_129 = torch.ops.aten.mul.Tensor(view_166, add_87);  view_166 = add_87 = None\n    mul_130 = torch.ops.aten.mul.Tensor(tanh_10, tanh_10);  tanh_10 = None\n    sub_35 = torch.ops.aten.sub.Tensor(1, mul_130);  mul_130 = None\n    mul_131 = torch.ops.aten.mul.Tensor(mul_128, sub_35);  mul_128 = sub_35 = None\n    mul_132 = torch.ops.aten.mul.Tensor(mul_131, 0.7978845608028654);  mul_131 = None\n    mul_133 = torch.ops.aten.mul.Tensor(mul_132, 0.044715)\n    pow_14 = torch.ops.aten.pow.Tensor_Scalar(view_129, 2.0);  view_129 = None\n    mul_134 = torch.ops.aten.mul.Scalar(pow_14, 3.0);  pow_14 = None\n    mul_135 = torch.ops.aten.mul.Tensor(mul_133, mul_134);  mul_133 = mul_134 = None\n    add_103 = torch.ops.aten.add.Tensor(mul_132, mul_135);  mul_132 = mul_135 = None\n    mul_136 = torch.ops.aten.mul.Tensor(mul_129, 0.5);  mul_129 = None\n    add_104 = torch.ops.aten.add.Tensor(add_103, mul_136);  add_103 = mul_136 = None\n    view_167 = torch.ops.aten.view.default(add_104, [64, 3072]);  add_104 = None\n    mm_13 = torch.ops.aten.mm.default(view_167, permute_125);  permute_125 = None\n    permute_126 = torch.ops.aten.permute.default(view_167, [1, 0])\n    mm_14 = torch.ops.aten.mm.default(permute_126, view_128);  permute_126 = view_128 = None\n    permute_127 = torch.ops.aten.permute.default(mm_14, [1, 0]);  mm_14 = None\n    sum_18 = torch.ops.aten.sum.dim_IntList(view_167, [0], True);  view_167 = None\n    view_168 = torch.ops.aten.view.default(sum_18, [3072]);  sum_18 = None\n    permute_128 = torch.ops.aten.permute.default(permute_127, [1, 0]);  permute_127 = None\n    view_169 = torch.ops.aten.view.default(mm_13, [1, 64, 768]);  mm_13 = None\n    mul_138 = torch.ops.aten.mul.Tensor(view_169, primals_130);  primals_130 = None\n    mul_139 = torch.ops.aten.mul.Tensor(mul_138, 768)\n    sum_19 = torch.ops.aten.sum.dim_IntList(mul_138, [2], True)\n    mul_140 = torch.ops.aten.mul.Tensor(mul_138, mul_82);  mul_138 = None\n    sum_20 = torch.ops.aten.sum.dim_IntList(mul_140, [2], True);  mul_140 = None\n    mul_141 = torch.ops.aten.mul.Tensor(mul_82, sum_20);  sum_20 = None\n    sub_37 = torch.ops.aten.sub.Tensor(mul_139, sum_19);  mul_139 = sum_19 = None\n    sub_38 = torch.ops.aten.sub.Tensor(sub_37, mul_141);  sub_37 = mul_141 = None\n    mul_142 = torch.ops.aten.mul.Tensor(div_3, sub_38);  div_3 = sub_38 = None\n    mul_143 = torch.ops.aten.mul.Tensor(view_169, mul_82);  mul_82 = None\n    sum_21 = torch.ops.aten.sum.dim_IntList(mul_143, [0, 1]);  mul_143 = None\n    sum_22 = torch.ops.aten.sum.dim_IntList(view_169, [0, 1]);  view_169 = None\n    add_105 = torch.ops.aten.add.Tensor(add_102, mul_142);  add_102 = mul_142 = None\n    view_170 = torch.ops.aten.view.default(add_105, [64, 768])\n    mm_15 = torch.ops.aten.mm.default(view_170, permute_129);  permute_129 = None\n    permute_130 = torch.ops.aten.permute.default(view_170, [1, 0])\n    permute_84 = torch.ops.aten.permute.default(getitem_115, [0, 2, 1, 3])\n    view_125 = torch.ops.aten.view.default(permute_84, [1, 64, 768]);  permute_84 = None\n    view_126 = torch.ops.aten.view.default(view_125, [64, 768]);  view_125 = None\n    mm_16 = torch.ops.aten.mm.default(permute_130, view_126);  permute_130 = view_126 = None\n    permute_131 = torch.ops.aten.permute.default(mm_16, [1, 0]);  mm_16 = None\n    sum_23 = torch.ops.aten.sum.dim_IntList(view_170, [0], True);  view_170 = None\n    view_171 = torch.ops.aten.view.default(sum_23, [768]);  sum_23 = None\n    permute_132 = torch.ops.aten.permute.default(permute_131, [1, 0]);  permute_131 = None\n    view_172 = torch.ops.aten.view.default(mm_15, [1, 64, 768]);  mm_15 = None\n    view_173 = torch.ops.aten.view.default(view_172, [1, 64, 12, 64]);  view_172 = None\n    permute_133 = torch.ops.aten.permute.default(view_173, [0, 2, 1, 3]);  view_173 = None\n    _scaled_dot_product_efficient_attention_backward_1 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_133, permute_82, permute_81, permute_83, None, getitem_115, getitem_116, getitem_117, getitem_118, 0.0, [True, True, True, False], True);  permute_133 = permute_82 = permute_81 = permute_83 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = None\n    getitem_138 = _scaled_dot_product_efficient_attention_backward_1[0]\n    getitem_139 = _scaled_dot_product_efficient_attention_backward_1[1]\n    getitem_140 = _scaled_dot_product_efficient_attention_backward_1[2];  _scaled_dot_product_efficient_attention_backward_1 = None\n    permute_134 = torch.ops.aten.permute.default(getitem_140, [0, 2, 1, 3]);  getitem_140 = None\n    view_174 = torch.ops.aten.view.default(permute_134, [1, 64, 768]);  permute_134 = None\n    permute_135 = torch.ops.aten.permute.default(getitem_138, [0, 2, 1, 3]);  getitem_138 = None\n    view_175 = torch.ops.aten.view.default(permute_135, [1, 64, 768]);  permute_135 = None\n    permute_136 = torch.ops.aten.permute.default(getitem_139, [0, 2, 1, 3]);  getitem_139 = None\n    view_176 = torch.ops.aten.view.default(permute_136, [1, 64, 768]);  permute_136 = None\n    cat_1 = torch.ops.aten.cat.default([view_175, view_176, view_174], 2);  view_175 = view_176 = view_174 = None\n    view_177 = torch.ops.aten.view.default(cat_1, [64, 2304]);  cat_1 = None\n    mm_17 = torch.ops.aten.mm.default(view_177, permute_137);  permute_137 = None\n    permute_138 = torch.ops.aten.permute.default(view_177, [1, 0])\n    mm_18 = torch.ops.aten.mm.default(permute_138, view_120);  permute_138 = view_120 = None\n    permute_139 = torch.ops.aten.permute.default(mm_18, [1, 0]);  mm_18 = None\n    sum_24 = torch.ops.aten.sum.dim_IntList(view_177, [0], True);  view_177 = None\n    view_178 = torch.ops.aten.view.default(sum_24, [2304]);  sum_24 = None\n    permute_140 = torch.ops.aten.permute.default(permute_139, [1, 0]);  permute_139 = None\n    view_179 = torch.ops.aten.view.default(mm_17, [1, 64, 768]);  mm_17 = None\n    mul_145 = torch.ops.aten.mul.Tensor(view_179, primals_124);  primals_124 = None\n    mul_146 = torch.ops.aten.mul.Tensor(mul_145, 768)\n    sum_25 = torch.ops.aten.sum.dim_IntList(mul_145, [2], True)\n    mul_147 = torch.ops.aten.mul.Tensor(mul_145, mul_80);  mul_145 = None\n    sum_26 = torch.ops.aten.sum.dim_IntList(mul_147, [2], True);  mul_147 = None\n    mul_148 = torch.ops.aten.mul.Tensor(mul_80, sum_26);  sum_26 = None\n    sub_40 = torch.ops.aten.sub.Tensor(mul_146, sum_25);  mul_146 = sum_25 = None\n    sub_41 = torch.ops.aten.sub.Tensor(sub_40, mul_148);  sub_40 = mul_148 = None\n    mul_149 = torch.ops.aten.mul.Tensor(div_4, sub_41);  div_4 = sub_41 = None\n    mul_150 = torch.ops.aten.mul.Tensor(view_179, mul_80);  mul_80 = None\n    sum_27 = torch.ops.aten.sum.dim_IntList(mul_150, [0, 1]);  mul_150 = None\n    sum_28 = torch.ops.aten.sum.dim_IntList(view_179, [0, 1]);  view_179 = None\n    add_106 = torch.ops.aten.add.Tensor(add_105, mul_149);  add_105 = mul_149 = None\n    view_180 = torch.ops.aten.view.default(add_106, [64, 768])\n    mm_19 = torch.ops.aten.mm.default(view_180, permute_141);  permute_141 = None\n    permute_142 = torch.ops.aten.permute.default(view_180, [1, 0])\n    mm_20 = torch.ops.aten.mm.default(permute_142, view_118);  permute_142 = view_118 = None\n    permute_143 = torch.ops.aten.permute.default(mm_20, [1, 0]);  mm_20 = None\n    sum_29 = torch.ops.aten.sum.dim_IntList(view_180, [0], True);  view_180 = None\n    view_181 = torch.ops.aten.view.default(sum_29, [768]);  sum_29 = None\n    permute_144 = torch.ops.aten.permute.default(permute_143, [1, 0]);  permute_143 = None\n    view_182 = torch.ops.aten.view.default(mm_19, [1, 64, 3072]);  mm_19 = None\n    view_117 = torch.ops.aten.view.default(addmm_38, [1, 64, 3072]);  addmm_38 = None\n    mul_76 = torch.ops.aten.mul.Tensor(view_117, 0.5)\n    mul_151 = torch.ops.aten.mul.Tensor(view_182, mul_76);  mul_76 = None\n    pow_10 = torch.ops.aten.pow.Tensor_Scalar(view_117, 3.0)\n    mul_77 = torch.ops.aten.mul.Tensor(pow_10, 0.044715);  pow_10 = None\n    add_78 = torch.ops.aten.add.Tensor(view_117, mul_77);  mul_77 = None\n    mul_78 = torch.ops.aten.mul.Tensor(add_78, 0.7978845608028654);  add_78 = None\n    tanh_9 = torch.ops.aten.tanh.default(mul_78);  mul_78 = None\n    add_79 = torch.ops.aten.add.Tensor(tanh_9, 1.0)\n    mul_152 = torch.ops.aten.mul.Tensor(view_182, add_79);  view_182 = add_79 = None\n    mul_153 = torch.ops.aten.mul.Tensor(tanh_9, tanh_9);  tanh_9 = None\n    sub_42 = torch.ops.aten.sub.Tensor(1, mul_153);  mul_153 = None\n    mul_154 = torch.ops.aten.mul.Tensor(mul_151, sub_42);  mul_151 = sub_42 = None\n    mul_155 = torch.ops.aten.mul.Tensor(mul_154, 0.7978845608028654);  mul_154 = None\n    mul_156 = torch.ops.aten.mul.Tensor(mul_155, 0.044715)\n    pow_15 = torch.ops.aten.pow.Tensor_Scalar(view_117, 2.0);  view_117 = None\n    mul_157 = torch.ops.aten.mul.Scalar(pow_15, 3.0);  pow_15 = None\n    mul_158 = torch.ops.aten.mul.Tensor(mul_156, mul_157);  mul_156 = mul_157 = None\n    add_107 = torch.ops.aten.add.Tensor(mul_155, mul_158);  mul_155 = mul_158 = None\n    mul_159 = torch.ops.aten.mul.Tensor(mul_152, 0.5);  mul_152 = None\n    add_108 = torch.ops.aten.add.Tensor(add_107, mul_159);  add_107 = mul_159 = None\n    view_183 = torch.ops.aten.view.default(add_108, [64, 3072]);  add_108 = None\n    mm_21 = torch.ops.aten.mm.default(view_183, permute_145);  permute_145 = None\n    permute_146 = torch.ops.aten.permute.default(view_183, [1, 0])\n    mm_22 = torch.ops.aten.mm.default(permute_146, view_116);  permute_146 = view_116 = None\n    permute_147 = torch.ops.aten.permute.default(mm_22, [1, 0]);  mm_22 = None\n    sum_30 = torch.ops.aten.sum.dim_IntList(view_183, [0], True);  view_183 = None\n    view_184 = torch.ops.aten.view.default(sum_30, [3072]);  sum_30 = None\n    permute_148 = torch.ops.aten.permute.default(permute_147, [1, 0]);  permute_147 = None\n    view_185 = torch.ops.aten.view.default(mm_21, [1, 64, 768]);  mm_21 = None\n    mul_161 = torch.ops.aten.mul.Tensor(view_185, primals_118);  primals_118 = None\n    mul_162 = torch.ops.aten.mul.Tensor(mul_161, 768)\n    sum_31 = torch.ops.aten.sum.dim_IntList(mul_161, [2], True)\n    mul_163 = torch.ops.aten.mul.Tensor(mul_161, mul_74);  mul_161 = None\n    sum_32 = torch.ops.aten.sum.dim_IntList(mul_163, [2], True);  mul_163 = None\n    mul_164 = torch.ops.aten.mul.Tensor(mul_74, sum_32);  sum_32 = None\n    sub_44 = torch.ops.aten.sub.Tensor(mul_162, sum_31);  mul_162 = sum_31 = None\n    sub_45 = torch.ops.aten.sub.Tensor(sub_44, mul_164);  sub_44 = mul_164 = None\n    mul_165 = torch.ops.aten.mul.Tensor(div_5, sub_45);  div_5 = sub_45 = None\n    mul_166 = torch.ops.aten.mul.Tensor(view_185, mul_74);  mul_74 = None\n    sum_33 = torch.ops.aten.sum.dim_IntList(mul_166, [0, 1]);  mul_166 = None\n    sum_34 = torch.ops.aten.sum.dim_IntList(view_185, [0, 1]);  view_185 = None\n    add_109 = torch.ops.aten.add.Tensor(add_106, mul_165);  add_106 = mul_165 = None\n    view_186 = torch.ops.aten.view.default(add_109, [64, 768])\n    mm_23 = torch.ops.aten.mm.default(view_186, permute_149);  permute_149 = None\n    permute_150 = torch.ops.aten.permute.default(view_186, [1, 0])\n    permute_76 = torch.ops.aten.permute.default(getitem_104, [0, 2, 1, 3])\n    view_113 = torch.ops.aten.view.default(permute_76, [1, 64, 768]);  permute_76 = None\n    view_114 = torch.ops.aten.view.default(view_113, [64, 768]);  view_113 = None\n    mm_24 = torch.ops.aten.mm.default(permute_150, view_114);  permute_150 = view_114 = None\n    permute_151 = torch.ops.aten.permute.default(mm_24, [1, 0]);  mm_24 = None\n    sum_35 = torch.ops.aten.sum.dim_IntList(view_186, [0], True);  view_186 = None\n    view_187 = torch.ops.aten.view.default(sum_35, [768]);  sum_35 = None\n    permute_152 = torch.ops.aten.permute.default(permute_151, [1, 0]);  permute_151 = None\n    view_188 = torch.ops.aten.view.default(mm_23, [1, 64, 768]);  mm_23 = None\n    view_189 = torch.ops.aten.view.default(view_188, [1, 64, 12, 64]);  view_188 = None\n    permute_153 = torch.ops.aten.permute.default(view_189, [0, 2, 1, 3]);  view_189 = None\n    _scaled_dot_product_efficient_attention_backward_2 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_153, permute_74, permute_73, permute_75, None, getitem_104, getitem_105, getitem_106, getitem_107, 0.0, [True, True, True, False], True);  permute_153 = permute_74 = permute_73 = permute_75 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = None\n    getitem_142 = _scaled_dot_product_efficient_attention_backward_2[0]\n    getitem_143 = _scaled_dot_product_efficient_attention_backward_2[1]\n    getitem_144 = _scaled_dot_product_efficient_attention_backward_2[2];  _scaled_dot_product_efficient_attention_backward_2 = None\n    permute_154 = torch.ops.aten.permute.default(getitem_144, [0, 2, 1, 3]);  getitem_144 = None\n    view_190 = torch.ops.aten.view.default(permute_154, [1, 64, 768]);  permute_154 = None\n    permute_155 = torch.ops.aten.permute.default(getitem_142, [0, 2, 1, 3]);  getitem_142 = None\n    view_191 = torch.ops.aten.view.default(permute_155, [1, 64, 768]);  permute_155 = None\n    permute_156 = torch.ops.aten.permute.default(getitem_143, [0, 2, 1, 3]);  getitem_143 = None\n    view_192 = torch.ops.aten.view.default(permute_156, [1, 64, 768]);  permute_156 = None\n    cat_2 = torch.ops.aten.cat.default([view_191, view_192, view_190], 2);  view_191 = view_192 = view_190 = None\n    view_193 = torch.ops.aten.view.default(cat_2, [64, 2304]);  cat_2 = None\n    mm_25 = torch.ops.aten.mm.default(view_193, permute_157);  permute_157 = None\n    permute_158 = torch.ops.aten.permute.default(view_193, [1, 0])\n    mm_26 = torch.ops.aten.mm.default(permute_158, view_108);  permute_158 = view_108 = None\n    permute_159 = torch.ops.aten.permute.default(mm_26, [1, 0]);  mm_26 = None\n    sum_36 = torch.ops.aten.sum.dim_IntList(view_193, [0], True);  view_193 = None\n    view_194 = torch.ops.aten.view.default(sum_36, [2304]);  sum_36 = None\n    permute_160 = torch.ops.aten.permute.default(permute_159, [1, 0]);  permute_159 = None\n    view_195 = torch.ops.aten.view.default(mm_25, [1, 64, 768]);  mm_25 = None\n    mul_168 = torch.ops.aten.mul.Tensor(view_195, primals_112);  primals_112 = None\n    mul_169 = torch.ops.aten.mul.Tensor(mul_168, 768)\n    sum_37 = torch.ops.aten.sum.dim_IntList(mul_168, [2], True)\n    mul_170 = torch.ops.aten.mul.Tensor(mul_168, mul_72);  mul_168 = None\n    sum_38 = torch.ops.aten.sum.dim_IntList(mul_170, [2], True);  mul_170 = None\n    mul_171 = torch.ops.aten.mul.Tensor(mul_72, sum_38);  sum_38 = None\n    sub_47 = torch.ops.aten.sub.Tensor(mul_169, sum_37);  mul_169 = sum_37 = None\n    sub_48 = torch.ops.aten.sub.Tensor(sub_47, mul_171);  sub_47 = mul_171 = None\n    mul_172 = torch.ops.aten.mul.Tensor(div_6, sub_48);  div_6 = sub_48 = None\n    mul_173 = torch.ops.aten.mul.Tensor(view_195, mul_72);  mul_72 = None\n    sum_39 = torch.ops.aten.sum.dim_IntList(mul_173, [0, 1]);  mul_173 = None\n    sum_40 = torch.ops.aten.sum.dim_IntList(view_195, [0, 1]);  view_195 = None\n    add_110 = torch.ops.aten.add.Tensor(add_109, mul_172);  add_109 = mul_172 = None\n    view_196 = torch.ops.aten.view.default(add_110, [64, 768])\n    mm_27 = torch.ops.aten.mm.default(view_196, permute_161);  permute_161 = None\n    permute_162 = torch.ops.aten.permute.default(view_196, [1, 0])\n    mm_28 = torch.ops.aten.mm.default(permute_162, view_106);  permute_162 = view_106 = None\n    permute_163 = torch.ops.aten.permute.default(mm_28, [1, 0]);  mm_28 = None\n    sum_41 = torch.ops.aten.sum.dim_IntList(view_196, [0], True);  view_196 = None\n    view_197 = torch.ops.aten.view.default(sum_41, [768]);  sum_41 = None\n    permute_164 = torch.ops.aten.permute.default(permute_163, [1, 0]);  permute_163 = None\n    view_198 = torch.ops.aten.view.default(mm_27, [1, 64, 3072]);  mm_27 = None\n    view_105 = torch.ops.aten.view.default(addmm_34, [1, 64, 3072]);  addmm_34 = None\n    mul_68 = torch.ops.aten.mul.Tensor(view_105, 0.5)\n    mul_174 = torch.ops.aten.mul.Tensor(view_198, mul_68);  mul_68 = None\n    pow_9 = torch.ops.aten.pow.Tensor_Scalar(view_105, 3.0)\n    mul_69 = torch.ops.aten.mul.Tensor(pow_9, 0.044715);  pow_9 = None\n    add_70 = torch.ops.aten.add.Tensor(view_105, mul_69);  mul_69 = None\n    mul_70 = torch.ops.aten.mul.Tensor(add_70, 0.7978845608028654);  add_70 = None\n    tanh_8 = torch.ops.aten.tanh.default(mul_70);  mul_70 = None\n    add_71 = torch.ops.aten.add.Tensor(tanh_8, 1.0)\n    mul_175 = torch.ops.aten.mul.Tensor(view_198, add_71);  view_198 = add_71 = None\n    mul_176 = torch.ops.aten.mul.Tensor(tanh_8, tanh_8);  tanh_8 = None\n    sub_49 = torch.ops.aten.sub.Tensor(1, mul_176);  mul_176 = None\n    mul_177 = torch.ops.aten.mul.Tensor(mul_174, sub_49);  mul_174 = sub_49 = None\n    mul_178 = torch.ops.aten.mul.Tensor(mul_177, 0.7978845608028654);  mul_177 = None\n    mul_179 = torch.ops.aten.mul.Tensor(mul_178, 0.044715)\n    pow_16 = torch.ops.aten.pow.Tensor_Scalar(view_105, 2.0);  view_105 = None\n    mul_180 = torch.ops.aten.mul.Scalar(pow_16, 3.0);  pow_16 = None\n    mul_181 = torch.ops.aten.mul.Tensor(mul_179, mul_180);  mul_179 = mul_180 = None\n    add_111 = torch.ops.aten.add.Tensor(mul_178, mul_181);  mul_178 = mul_181 = None\n    mul_182 = torch.ops.aten.mul.Tensor(mul_175, 0.5);  mul_175 = None\n    add_112 = torch.ops.aten.add.Tensor(add_111, mul_182);  add_111 = mul_182 = None\n    view_199 = torch.ops.aten.view.default(add_112, [64, 3072]);  add_112 = None\n    mm_29 = torch.ops.aten.mm.default(view_199, permute_165);  permute_165 = None\n    permute_166 = torch.ops.aten.permute.default(view_199, [1, 0])\n    mm_30 = torch.ops.aten.mm.default(permute_166, view_104);  permute_166 = view_104 = None\n    permute_167 = torch.ops.aten.permute.default(mm_30, [1, 0]);  mm_30 = None\n    sum_42 = torch.ops.aten.sum.dim_IntList(view_199, [0], True);  view_199 = None\n    view_200 = torch.ops.aten.view.default(sum_42, [3072]);  sum_42 = None\n    permute_168 = torch.ops.aten.permute.default(permute_167, [1, 0]);  permute_167 = None\n    view_201 = torch.ops.aten.view.default(mm_29, [1, 64, 768]);  mm_29 = None\n    mul_184 = torch.ops.aten.mul.Tensor(view_201, primals_106);  primals_106 = None\n    mul_185 = torch.ops.aten.mul.Tensor(mul_184, 768)\n    sum_43 = torch.ops.aten.sum.dim_IntList(mul_184, [2], True)\n    mul_186 = torch.ops.aten.mul.Tensor(mul_184, mul_66);  mul_184 = None\n    sum_44 = torch.ops.aten.sum.dim_IntList(mul_186, [2], True);  mul_186 = None\n    mul_187 = torch.ops.aten.mul.Tensor(mul_66, sum_44);  sum_44 = None\n    sub_51 = torch.ops.aten.sub.Tensor(mul_185, sum_43);  mul_185 = sum_43 = None\n    sub_52 = torch.ops.aten.sub.Tensor(sub_51, mul_187);  sub_51 = mul_187 = None\n    mul_188 = torch.ops.aten.mul.Tensor(div_7, sub_52);  div_7 = sub_52 = None\n    mul_189 = torch.ops.aten.mul.Tensor(view_201, mul_66);  mul_66 = None\n    sum_45 = torch.ops.aten.sum.dim_IntList(mul_189, [0, 1]);  mul_189 = None\n    sum_46 = torch.ops.aten.sum.dim_IntList(view_201, [0, 1]);  view_201 = None\n    add_113 = torch.ops.aten.add.Tensor(add_110, mul_188);  add_110 = mul_188 = None\n    view_202 = torch.ops.aten.view.default(add_113, [64, 768])\n    mm_31 = torch.ops.aten.mm.default(view_202, permute_169);  permute_169 = None\n    permute_170 = torch.ops.aten.permute.default(view_202, [1, 0])\n    permute_68 = torch.ops.aten.permute.default(getitem_93, [0, 2, 1, 3])\n    view_101 = torch.ops.aten.view.default(permute_68, [1, 64, 768]);  permute_68 = None\n    view_102 = torch.ops.aten.view.default(view_101, [64, 768]);  view_101 = None\n    mm_32 = torch.ops.aten.mm.default(permute_170, view_102);  permute_170 = view_102 = None\n    permute_171 = torch.ops.aten.permute.default(mm_32, [1, 0]);  mm_32 = None\n    sum_47 = torch.ops.aten.sum.dim_IntList(view_202, [0], True);  view_202 = None\n    view_203 = torch.ops.aten.view.default(sum_47, [768]);  sum_47 = None\n    permute_172 = torch.ops.aten.permute.default(permute_171, [1, 0]);  permute_171 = None\n    view_204 = torch.ops.aten.view.default(mm_31, [1, 64, 768]);  mm_31 = None\n    view_205 = torch.ops.aten.view.default(view_204, [1, 64, 12, 64]);  view_204 = None\n    permute_173 = torch.ops.aten.permute.default(view_205, [0, 2, 1, 3]);  view_205 = None\n    _scaled_dot_product_efficient_attention_backward_3 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_173, permute_66, permute_65, permute_67, None, getitem_93, getitem_94, getitem_95, getitem_96, 0.0, [True, True, True, False], True);  permute_173 = permute_66 = permute_65 = permute_67 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = None\n    getitem_146 = _scaled_dot_product_efficient_attention_backward_3[0]\n    getitem_147 = _scaled_dot_product_efficient_attention_backward_3[1]\n    getitem_148 = _scaled_dot_product_efficient_attention_backward_3[2];  _scaled_dot_product_efficient_attention_backward_3 = None\n    permute_174 = torch.ops.aten.permute.default(getitem_148, [0, 2, 1, 3]);  getitem_148 = None\n    view_206 = torch.ops.aten.view.default(permute_174, [1, 64, 768]);  permute_174 = None\n    permute_175 = torch.ops.aten.permute.default(getitem_146, [0, 2, 1, 3]);  getitem_146 = None\n    view_207 = torch.ops.aten.view.default(permute_175, [1, 64, 768]);  permute_175 = None\n    permute_176 = torch.ops.aten.permute.default(getitem_147, [0, 2, 1, 3]);  getitem_147 = None\n    view_208 = torch.ops.aten.view.default(permute_176, [1, 64, 768]);  permute_176 = None\n    cat_3 = torch.ops.aten.cat.default([view_207, view_208, view_206], 2);  view_207 = view_208 = view_206 = None\n    view_209 = torch.ops.aten.view.default(cat_3, [64, 2304]);  cat_3 = None\n    mm_33 = torch.ops.aten.mm.default(view_209, permute_177);  permute_177 = None\n    permute_178 = torch.ops.aten.permute.default(view_209, [1, 0])\n    mm_34 = torch.ops.aten.mm.default(permute_178, view_96);  permute_178 = view_96 = None\n    permute_179 = torch.ops.aten.permute.default(mm_34, [1, 0]);  mm_34 = None\n    sum_48 = torch.ops.aten.sum.dim_IntList(view_209, [0], True);  view_209 = None\n    view_210 = torch.ops.aten.view.default(sum_48, [2304]);  sum_48 = None\n    permute_180 = torch.ops.aten.permute.default(permute_179, [1, 0]);  permute_179 = None\n    view_211 = torch.ops.aten.view.default(mm_33, [1, 64, 768]);  mm_33 = None\n    mul_191 = torch.ops.aten.mul.Tensor(view_211, primals_100);  primals_100 = None\n    mul_192 = torch.ops.aten.mul.Tensor(mul_191, 768)\n    sum_49 = torch.ops.aten.sum.dim_IntList(mul_191, [2], True)\n    mul_193 = torch.ops.aten.mul.Tensor(mul_191, mul_64);  mul_191 = None\n    sum_50 = torch.ops.aten.sum.dim_IntList(mul_193, [2], True);  mul_193 = None\n    mul_194 = torch.ops.aten.mul.Tensor(mul_64, sum_50);  sum_50 = None\n    sub_54 = torch.ops.aten.sub.Tensor(mul_192, sum_49);  mul_192 = sum_49 = None\n    sub_55 = torch.ops.aten.sub.Tensor(sub_54, mul_194);  sub_54 = mul_194 = None\n    mul_195 = torch.ops.aten.mul.Tensor(div_8, sub_55);  div_8 = sub_55 = None\n    mul_196 = torch.ops.aten.mul.Tensor(view_211, mul_64);  mul_64 = None\n    sum_51 = torch.ops.aten.sum.dim_IntList(mul_196, [0, 1]);  mul_196 = None\n    sum_52 = torch.ops.aten.sum.dim_IntList(view_211, [0, 1]);  view_211 = None\n    add_114 = torch.ops.aten.add.Tensor(add_113, mul_195);  add_113 = mul_195 = None\n    view_212 = torch.ops.aten.view.default(add_114, [64, 768])\n    mm_35 = torch.ops.aten.mm.default(view_212, permute_181);  permute_181 = None\n    permute_182 = torch.ops.aten.permute.default(view_212, [1, 0])\n    mm_36 = torch.ops.aten.mm.default(permute_182, view_94);  permute_182 = view_94 = None\n    permute_183 = torch.ops.aten.permute.default(mm_36, [1, 0]);  mm_36 = None\n    sum_53 = torch.ops.aten.sum.dim_IntList(view_212, [0], True);  view_212 = None\n    view_213 = torch.ops.aten.view.default(sum_53, [768]);  sum_53 = None\n    permute_184 = torch.ops.aten.permute.default(permute_183, [1, 0]);  permute_183 = None\n    view_214 = torch.ops.aten.view.default(mm_35, [1, 64, 3072]);  mm_35 = None\n    view_93 = torch.ops.aten.view.default(addmm_30, [1, 64, 3072]);  addmm_30 = None\n    mul_60 = torch.ops.aten.mul.Tensor(view_93, 0.5)\n    mul_197 = torch.ops.aten.mul.Tensor(view_214, mul_60);  mul_60 = None\n    pow_8 = torch.ops.aten.pow.Tensor_Scalar(view_93, 3.0)\n    mul_61 = torch.ops.aten.mul.Tensor(pow_8, 0.044715);  pow_8 = None\n    add_62 = torch.ops.aten.add.Tensor(view_93, mul_61);  mul_61 = None\n    mul_62 = torch.ops.aten.mul.Tensor(add_62, 0.7978845608028654);  add_62 = None\n    tanh_7 = torch.ops.aten.tanh.default(mul_62);  mul_62 = None\n    add_63 = torch.ops.aten.add.Tensor(tanh_7, 1.0)\n    mul_198 = torch.ops.aten.mul.Tensor(view_214, add_63);  view_214 = add_63 = None\n    mul_199 = torch.ops.aten.mul.Tensor(tanh_7, tanh_7);  tanh_7 = None\n    sub_56 = torch.ops.aten.sub.Tensor(1, mul_199);  mul_199 = None\n    mul_200 = torch.ops.aten.mul.Tensor(mul_197, sub_56);  mul_197 = sub_56 = None\n    mul_201 = torch.ops.aten.mul.Tensor(mul_200, 0.7978845608028654);  mul_200 = None\n    mul_202 = torch.ops.aten.mul.Tensor(mul_201, 0.044715)\n    pow_17 = torch.ops.aten.pow.Tensor_Scalar(view_93, 2.0);  view_93 = None\n    mul_203 = torch.ops.aten.mul.Scalar(pow_17, 3.0);  pow_17 = None\n    mul_204 = torch.ops.aten.mul.Tensor(mul_202, mul_203);  mul_202 = mul_203 = None\n    add_115 = torch.ops.aten.add.Tensor(mul_201, mul_204);  mul_201 = mul_204 = None\n    mul_205 = torch.ops.aten.mul.Tensor(mul_198, 0.5);  mul_198 = None\n    add_116 = torch.ops.aten.add.Tensor(add_115, mul_205);  add_115 = mul_205 = None\n    view_215 = torch.ops.aten.view.default(add_116, [64, 3072]);  add_116 = None\n    mm_37 = torch.ops.aten.mm.default(view_215, permute_185);  permute_185 = None\n    permute_186 = torch.ops.aten.permute.default(view_215, [1, 0])\n    mm_38 = torch.ops.aten.mm.default(permute_186, view_92);  permute_186 = view_92 = None\n    permute_187 = torch.ops.aten.permute.default(mm_38, [1, 0]);  mm_38 = None\n    sum_54 = torch.ops.aten.sum.dim_IntList(view_215, [0], True);  view_215 = None\n    view_216 = torch.ops.aten.view.default(sum_54, [3072]);  sum_54 = None\n    permute_188 = torch.ops.aten.permute.default(permute_187, [1, 0]);  permute_187 = None\n    view_217 = torch.ops.aten.view.default(mm_37, [1, 64, 768]);  mm_37 = None\n    mul_207 = torch.ops.aten.mul.Tensor(view_217, primals_94);  primals_94 = None\n    mul_208 = torch.ops.aten.mul.Tensor(mul_207, 768)\n    sum_55 = torch.ops.aten.sum.dim_IntList(mul_207, [2], True)\n    mul_209 = torch.ops.aten.mul.Tensor(mul_207, mul_58);  mul_207 = None\n    sum_56 = torch.ops.aten.sum.dim_IntList(mul_209, [2], True);  mul_209 = None\n    mul_210 = torch.ops.aten.mul.Tensor(mul_58, sum_56);  sum_56 = None\n    sub_58 = torch.ops.aten.sub.Tensor(mul_208, sum_55);  mul_208 = sum_55 = None\n    sub_59 = torch.ops.aten.sub.Tensor(sub_58, mul_210);  sub_58 = mul_210 = None\n    mul_211 = torch.ops.aten.mul.Tensor(div_9, sub_59);  div_9 = sub_59 = None\n    mul_212 = torch.ops.aten.mul.Tensor(view_217, mul_58);  mul_58 = None\n    sum_57 = torch.ops.aten.sum.dim_IntList(mul_212, [0, 1]);  mul_212 = None\n    sum_58 = torch.ops.aten.sum.dim_IntList(view_217, [0, 1]);  view_217 = None\n    add_117 = torch.ops.aten.add.Tensor(add_114, mul_211);  add_114 = mul_211 = None\n    view_218 = torch.ops.aten.view.default(add_117, [64, 768])\n    mm_39 = torch.ops.aten.mm.default(view_218, permute_189);  permute_189 = None\n    permute_190 = torch.ops.aten.permute.default(view_218, [1, 0])\n    permute_60 = torch.ops.aten.permute.default(getitem_82, [0, 2, 1, 3])\n    view_89 = torch.ops.aten.view.default(permute_60, [1, 64, 768]);  permute_60 = None\n    view_90 = torch.ops.aten.view.default(view_89, [64, 768]);  view_89 = None\n    mm_40 = torch.ops.aten.mm.default(permute_190, view_90);  permute_190 = view_90 = None\n    permute_191 = torch.ops.aten.permute.default(mm_40, [1, 0]);  mm_40 = None\n    sum_59 = torch.ops.aten.sum.dim_IntList(view_218, [0], True);  view_218 = None\n    view_219 = torch.ops.aten.view.default(sum_59, [768]);  sum_59 = None\n    permute_192 = torch.ops.aten.permute.default(permute_191, [1, 0]);  permute_191 = None\n    view_220 = torch.ops.aten.view.default(mm_39, [1, 64, 768]);  mm_39 = None\n    view_221 = torch.ops.aten.view.default(view_220, [1, 64, 12, 64]);  view_220 = None\n    permute_193 = torch.ops.aten.permute.default(view_221, [0, 2, 1, 3]);  view_221 = None\n    _scaled_dot_product_efficient_attention_backward_4 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_193, permute_58, permute_57, permute_59, None, getitem_82, getitem_83, getitem_84, getitem_85, 0.0, [True, True, True, False], True);  permute_193 = permute_58 = permute_57 = permute_59 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = None\n    getitem_150 = _scaled_dot_product_efficient_attention_backward_4[0]\n    getitem_151 = _scaled_dot_product_efficient_attention_backward_4[1]\n    getitem_152 = _scaled_dot_product_efficient_attention_backward_4[2];  _scaled_dot_product_efficient_attention_backward_4 = None\n    permute_194 = torch.ops.aten.permute.default(getitem_152, [0, 2, 1, 3]);  getitem_152 = None\n    view_222 = torch.ops.aten.view.default(permute_194, [1, 64, 768]);  permute_194 = None\n    permute_195 = torch.ops.aten.permute.default(getitem_150, [0, 2, 1, 3]);  getitem_150 = None\n    view_223 = torch.ops.aten.view.default(permute_195, [1, 64, 768]);  permute_195 = None\n    permute_196 = torch.ops.aten.permute.default(getitem_151, [0, 2, 1, 3]);  getitem_151 = None\n    view_224 = torch.ops.aten.view.default(permute_196, [1, 64, 768]);  permute_196 = None\n    cat_4 = torch.ops.aten.cat.default([view_223, view_224, view_222], 2);  view_223 = view_224 = view_222 = None\n    view_225 = torch.ops.aten.view.default(cat_4, [64, 2304]);  cat_4 = None\n    mm_41 = torch.ops.aten.mm.default(view_225, permute_197);  permute_197 = None\n    permute_198 = torch.ops.aten.permute.default(view_225, [1, 0])\n    mm_42 = torch.ops.aten.mm.default(permute_198, view_84);  permute_198 = view_84 = None\n    permute_199 = torch.ops.aten.permute.default(mm_42, [1, 0]);  mm_42 = None\n    sum_60 = torch.ops.aten.sum.dim_IntList(view_225, [0], True);  view_225 = None\n    view_226 = torch.ops.aten.view.default(sum_60, [2304]);  sum_60 = None\n    permute_200 = torch.ops.aten.permute.default(permute_199, [1, 0]);  permute_199 = None\n    view_227 = torch.ops.aten.view.default(mm_41, [1, 64, 768]);  mm_41 = None\n    mul_214 = torch.ops.aten.mul.Tensor(view_227, primals_88);  primals_88 = None\n    mul_215 = torch.ops.aten.mul.Tensor(mul_214, 768)\n    sum_61 = torch.ops.aten.sum.dim_IntList(mul_214, [2], True)\n    mul_216 = torch.ops.aten.mul.Tensor(mul_214, mul_56);  mul_214 = None\n    sum_62 = torch.ops.aten.sum.dim_IntList(mul_216, [2], True);  mul_216 = None\n    mul_217 = torch.ops.aten.mul.Tensor(mul_56, sum_62);  sum_62 = None\n    sub_61 = torch.ops.aten.sub.Tensor(mul_215, sum_61);  mul_215 = sum_61 = None\n    sub_62 = torch.ops.aten.sub.Tensor(sub_61, mul_217);  sub_61 = mul_217 = None\n    mul_218 = torch.ops.aten.mul.Tensor(div_10, sub_62);  div_10 = sub_62 = None\n    mul_219 = torch.ops.aten.mul.Tensor(view_227, mul_56);  mul_56 = None\n    sum_63 = torch.ops.aten.sum.dim_IntList(mul_219, [0, 1]);  mul_219 = None\n    sum_64 = torch.ops.aten.sum.dim_IntList(view_227, [0, 1]);  view_227 = None\n    add_118 = torch.ops.aten.add.Tensor(add_117, mul_218);  add_117 = mul_218 = None\n    view_228 = torch.ops.aten.view.default(add_118, [64, 768])\n    mm_43 = torch.ops.aten.mm.default(view_228, permute_201);  permute_201 = None\n    permute_202 = torch.ops.aten.permute.default(view_228, [1, 0])\n    mm_44 = torch.ops.aten.mm.default(permute_202, view_82);  permute_202 = view_82 = None\n    permute_203 = torch.ops.aten.permute.default(mm_44, [1, 0]);  mm_44 = None\n    sum_65 = torch.ops.aten.sum.dim_IntList(view_228, [0], True);  view_228 = None\n    view_229 = torch.ops.aten.view.default(sum_65, [768]);  sum_65 = None\n    permute_204 = torch.ops.aten.permute.default(permute_203, [1, 0]);  permute_203 = None\n    view_230 = torch.ops.aten.view.default(mm_43, [1, 64, 3072]);  mm_43 = None\n    view_81 = torch.ops.aten.view.default(addmm_26, [1, 64, 3072]);  addmm_26 = None\n    mul_52 = torch.ops.aten.mul.Tensor(view_81, 0.5)\n    mul_220 = torch.ops.aten.mul.Tensor(view_230, mul_52);  mul_52 = None\n    pow_7 = torch.ops.aten.pow.Tensor_Scalar(view_81, 3.0)\n    mul_53 = torch.ops.aten.mul.Tensor(pow_7, 0.044715);  pow_7 = None\n    add_54 = torch.ops.aten.add.Tensor(view_81, mul_53);  mul_53 = None\n    mul_54 = torch.ops.aten.mul.Tensor(add_54, 0.7978845608028654);  add_54 = None\n    tanh_6 = torch.ops.aten.tanh.default(mul_54);  mul_54 = None\n    add_55 = torch.ops.aten.add.Tensor(tanh_6, 1.0)\n    mul_221 = torch.ops.aten.mul.Tensor(view_230, add_55);  view_230 = add_55 = None\n    mul_222 = torch.ops.aten.mul.Tensor(tanh_6, tanh_6);  tanh_6 = None\n    sub_63 = torch.ops.aten.sub.Tensor(1, mul_222);  mul_222 = None\n    mul_223 = torch.ops.aten.mul.Tensor(mul_220, sub_63);  mul_220 = sub_63 = None\n    mul_224 = torch.ops.aten.mul.Tensor(mul_223, 0.7978845608028654);  mul_223 = None\n    mul_225 = torch.ops.aten.mul.Tensor(mul_224, 0.044715)\n    pow_18 = torch.ops.aten.pow.Tensor_Scalar(view_81, 2.0);  view_81 = None\n    mul_226 = torch.ops.aten.mul.Scalar(pow_18, 3.0);  pow_18 = None\n    mul_227 = torch.ops.aten.mul.Tensor(mul_225, mul_226);  mul_225 = mul_226 = None\n    add_119 = torch.ops.aten.add.Tensor(mul_224, mul_227);  mul_224 = mul_227 = None\n    mul_228 = torch.ops.aten.mul.Tensor(mul_221, 0.5);  mul_221 = None\n    add_120 = torch.ops.aten.add.Tensor(add_119, mul_228);  add_119 = mul_228 = None\n    view_231 = torch.ops.aten.view.default(add_120, [64, 3072]);  add_120 = None\n    mm_45 = torch.ops.aten.mm.default(view_231, permute_205);  permute_205 = None\n    permute_206 = torch.ops.aten.permute.default(view_231, [1, 0])\n    mm_46 = torch.ops.aten.mm.default(permute_206, view_80);  permute_206 = view_80 = None\n    permute_207 = torch.ops.aten.permute.default(mm_46, [1, 0]);  mm_46 = None\n    sum_66 = torch.ops.aten.sum.dim_IntList(view_231, [0], True);  view_231 = None\n    view_232 = torch.ops.aten.view.default(sum_66, [3072]);  sum_66 = None\n    permute_208 = torch.ops.aten.permute.default(permute_207, [1, 0]);  permute_207 = None\n    view_233 = torch.ops.aten.view.default(mm_45, [1, 64, 768]);  mm_45 = None\n    mul_230 = torch.ops.aten.mul.Tensor(view_233, primals_82);  primals_82 = None\n    mul_231 = torch.ops.aten.mul.Tensor(mul_230, 768)\n    sum_67 = torch.ops.aten.sum.dim_IntList(mul_230, [2], True)\n    mul_232 = torch.ops.aten.mul.Tensor(mul_230, mul_50);  mul_230 = None\n    sum_68 = torch.ops.aten.sum.dim_IntList(mul_232, [2], True);  mul_232 = None\n    mul_233 = torch.ops.aten.mul.Tensor(mul_50, sum_68);  sum_68 = None\n    sub_65 = torch.ops.aten.sub.Tensor(mul_231, sum_67);  mul_231 = sum_67 = None\n    sub_66 = torch.ops.aten.sub.Tensor(sub_65, mul_233);  sub_65 = mul_233 = None\n    mul_234 = torch.ops.aten.mul.Tensor(div_11, sub_66);  div_11 = sub_66 = None\n    mul_235 = torch.ops.aten.mul.Tensor(view_233, mul_50);  mul_50 = None\n    sum_69 = torch.ops.aten.sum.dim_IntList(mul_235, [0, 1]);  mul_235 = None\n    sum_70 = torch.ops.aten.sum.dim_IntList(view_233, [0, 1]);  view_233 = None\n    add_121 = torch.ops.aten.add.Tensor(add_118, mul_234);  add_118 = mul_234 = None\n    view_234 = torch.ops.aten.view.default(add_121, [64, 768])\n    mm_47 = torch.ops.aten.mm.default(view_234, permute_209);  permute_209 = None\n    permute_210 = torch.ops.aten.permute.default(view_234, [1, 0])\n    permute_52 = torch.ops.aten.permute.default(getitem_71, [0, 2, 1, 3])\n    view_77 = torch.ops.aten.view.default(permute_52, [1, 64, 768]);  permute_52 = None\n    view_78 = torch.ops.aten.view.default(view_77, [64, 768]);  view_77 = None\n    mm_48 = torch.ops.aten.mm.default(permute_210, view_78);  permute_210 = view_78 = None\n    permute_211 = torch.ops.aten.permute.default(mm_48, [1, 0]);  mm_48 = None\n    sum_71 = torch.ops.aten.sum.dim_IntList(view_234, [0], True);  view_234 = None\n    view_235 = torch.ops.aten.view.default(sum_71, [768]);  sum_71 = None\n    permute_212 = torch.ops.aten.permute.default(permute_211, [1, 0]);  permute_211 = None\n    view_236 = torch.ops.aten.view.default(mm_47, [1, 64, 768]);  mm_47 = None\n    view_237 = torch.ops.aten.view.default(view_236, [1, 64, 12, 64]);  view_236 = None\n    permute_213 = torch.ops.aten.permute.default(view_237, [0, 2, 1, 3]);  view_237 = None\n    _scaled_dot_product_efficient_attention_backward_5 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_213, permute_50, permute_49, permute_51, None, getitem_71, getitem_72, getitem_73, getitem_74, 0.0, [True, True, True, False], True);  permute_213 = permute_50 = permute_49 = permute_51 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = None\n    getitem_154 = _scaled_dot_product_efficient_attention_backward_5[0]\n    getitem_155 = _scaled_dot_product_efficient_attention_backward_5[1]\n    getitem_156 = _scaled_dot_product_efficient_attention_backward_5[2];  _scaled_dot_product_efficient_attention_backward_5 = None\n    permute_214 = torch.ops.aten.permute.default(getitem_156, [0, 2, 1, 3]);  getitem_156 = None\n    view_238 = torch.ops.aten.view.default(permute_214, [1, 64, 768]);  permute_214 = None\n    permute_215 = torch.ops.aten.permute.default(getitem_154, [0, 2, 1, 3]);  getitem_154 = None\n    view_239 = torch.ops.aten.view.default(permute_215, [1, 64, 768]);  permute_215 = None\n    permute_216 = torch.ops.aten.permute.default(getitem_155, [0, 2, 1, 3]);  getitem_155 = None\n    view_240 = torch.ops.aten.view.default(permute_216, [1, 64, 768]);  permute_216 = None\n    cat_5 = torch.ops.aten.cat.default([view_239, view_240, view_238], 2);  view_239 = view_240 = view_238 = None\n    view_241 = torch.ops.aten.view.default(cat_5, [64, 2304]);  cat_5 = None\n    mm_49 = torch.ops.aten.mm.default(view_241, permute_217);  permute_217 = None\n    permute_218 = torch.ops.aten.permute.default(view_241, [1, 0])\n    mm_50 = torch.ops.aten.mm.default(permute_218, view_72);  permute_218 = view_72 = None\n    permute_219 = torch.ops.aten.permute.default(mm_50, [1, 0]);  mm_50 = None\n    sum_72 = torch.ops.aten.sum.dim_IntList(view_241, [0], True);  view_241 = None\n    view_242 = torch.ops.aten.view.default(sum_72, [2304]);  sum_72 = None\n    permute_220 = torch.ops.aten.permute.default(permute_219, [1, 0]);  permute_219 = None\n    view_243 = torch.ops.aten.view.default(mm_49, [1, 64, 768]);  mm_49 = None\n    mul_237 = torch.ops.aten.mul.Tensor(view_243, primals_76);  primals_76 = None\n    mul_238 = torch.ops.aten.mul.Tensor(mul_237, 768)\n    sum_73 = torch.ops.aten.sum.dim_IntList(mul_237, [2], True)\n    mul_239 = torch.ops.aten.mul.Tensor(mul_237, mul_48);  mul_237 = None\n    sum_74 = torch.ops.aten.sum.dim_IntList(mul_239, [2], True);  mul_239 = None\n    mul_240 = torch.ops.aten.mul.Tensor(mul_48, sum_74);  sum_74 = None\n    sub_68 = torch.ops.aten.sub.Tensor(mul_238, sum_73);  mul_238 = sum_73 = None\n    sub_69 = torch.ops.aten.sub.Tensor(sub_68, mul_240);  sub_68 = mul_240 = None\n    mul_241 = torch.ops.aten.mul.Tensor(div_12, sub_69);  div_12 = sub_69 = None\n    mul_242 = torch.ops.aten.mul.Tensor(view_243, mul_48);  mul_48 = None\n    sum_75 = torch.ops.aten.sum.dim_IntList(mul_242, [0, 1]);  mul_242 = None\n    sum_76 = torch.ops.aten.sum.dim_IntList(view_243, [0, 1]);  view_243 = None\n    add_122 = torch.ops.aten.add.Tensor(add_121, mul_241);  add_121 = mul_241 = None\n    view_244 = torch.ops.aten.view.default(add_122, [64, 768])\n    mm_51 = torch.ops.aten.mm.default(view_244, permute_221);  permute_221 = None\n    permute_222 = torch.ops.aten.permute.default(view_244, [1, 0])\n    mm_52 = torch.ops.aten.mm.default(permute_222, view_70);  permute_222 = view_70 = None\n    permute_223 = torch.ops.aten.permute.default(mm_52, [1, 0]);  mm_52 = None\n    sum_77 = torch.ops.aten.sum.dim_IntList(view_244, [0], True);  view_244 = None\n    view_245 = torch.ops.aten.view.default(sum_77, [768]);  sum_77 = None\n    permute_224 = torch.ops.aten.permute.default(permute_223, [1, 0]);  permute_223 = None\n    view_246 = torch.ops.aten.view.default(mm_51, [1, 64, 3072]);  mm_51 = None\n    view_69 = torch.ops.aten.view.default(addmm_22, [1, 64, 3072]);  addmm_22 = None\n    mul_44 = torch.ops.aten.mul.Tensor(view_69, 0.5)\n    mul_243 = torch.ops.aten.mul.Tensor(view_246, mul_44);  mul_44 = None\n    pow_6 = torch.ops.aten.pow.Tensor_Scalar(view_69, 3.0)\n    mul_45 = torch.ops.aten.mul.Tensor(pow_6, 0.044715);  pow_6 = None\n    add_46 = torch.ops.aten.add.Tensor(view_69, mul_45);  mul_45 = None\n    mul_46 = torch.ops.aten.mul.Tensor(add_46, 0.7978845608028654);  add_46 = None\n    tanh_5 = torch.ops.aten.tanh.default(mul_46);  mul_46 = None\n    add_47 = torch.ops.aten.add.Tensor(tanh_5, 1.0)\n    mul_244 = torch.ops.aten.mul.Tensor(view_246, add_47);  view_246 = add_47 = None\n    mul_245 = torch.ops.aten.mul.Tensor(tanh_5, tanh_5);  tanh_5 = None\n    sub_70 = torch.ops.aten.sub.Tensor(1, mul_245);  mul_245 = None\n    mul_246 = torch.ops.aten.mul.Tensor(mul_243, sub_70);  mul_243 = sub_70 = None\n    mul_247 = torch.ops.aten.mul.Tensor(mul_246, 0.7978845608028654);  mul_246 = None\n    mul_248 = torch.ops.aten.mul.Tensor(mul_247, 0.044715)\n    pow_19 = torch.ops.aten.pow.Tensor_Scalar(view_69, 2.0);  view_69 = None\n    mul_249 = torch.ops.aten.mul.Scalar(pow_19, 3.0);  pow_19 = None\n    mul_250 = torch.ops.aten.mul.Tensor(mul_248, mul_249);  mul_248 = mul_249 = None\n    add_123 = torch.ops.aten.add.Tensor(mul_247, mul_250);  mul_247 = mul_250 = None\n    mul_251 = torch.ops.aten.mul.Tensor(mul_244, 0.5);  mul_244 = None\n    add_124 = torch.ops.aten.add.Tensor(add_123, mul_251);  add_123 = mul_251 = None\n    view_247 = torch.ops.aten.view.default(add_124, [64, 3072]);  add_124 = None\n    mm_53 = torch.ops.aten.mm.default(view_247, permute_225);  permute_225 = None\n    permute_226 = torch.ops.aten.permute.default(view_247, [1, 0])\n    mm_54 = torch.ops.aten.mm.default(permute_226, view_68);  permute_226 = view_68 = None\n    permute_227 = torch.ops.aten.permute.default(mm_54, [1, 0]);  mm_54 = None\n    sum_78 = torch.ops.aten.sum.dim_IntList(view_247, [0], True);  view_247 = None\n    view_248 = torch.ops.aten.view.default(sum_78, [3072]);  sum_78 = None\n    permute_228 = torch.ops.aten.permute.default(permute_227, [1, 0]);  permute_227 = None\n    view_249 = torch.ops.aten.view.default(mm_53, [1, 64, 768]);  mm_53 = None\n    mul_253 = torch.ops.aten.mul.Tensor(view_249, primals_70);  primals_70 = None\n    mul_254 = torch.ops.aten.mul.Tensor(mul_253, 768)\n    sum_79 = torch.ops.aten.sum.dim_IntList(mul_253, [2], True)\n    mul_255 = torch.ops.aten.mul.Tensor(mul_253, mul_42);  mul_253 = None\n    sum_80 = torch.ops.aten.sum.dim_IntList(mul_255, [2], True);  mul_255 = None\n    mul_256 = torch.ops.aten.mul.Tensor(mul_42, sum_80);  sum_80 = None\n    sub_72 = torch.ops.aten.sub.Tensor(mul_254, sum_79);  mul_254 = sum_79 = None\n    sub_73 = torch.ops.aten.sub.Tensor(sub_72, mul_256);  sub_72 = mul_256 = None\n    mul_257 = torch.ops.aten.mul.Tensor(div_13, sub_73);  div_13 = sub_73 = None\n    mul_258 = torch.ops.aten.mul.Tensor(view_249, mul_42);  mul_42 = None\n    sum_81 = torch.ops.aten.sum.dim_IntList(mul_258, [0, 1]);  mul_258 = None\n    sum_82 = torch.ops.aten.sum.dim_IntList(view_249, [0, 1]);  view_249 = None\n    add_125 = torch.ops.aten.add.Tensor(add_122, mul_257);  add_122 = mul_257 = None\n    view_250 = torch.ops.aten.view.default(add_125, [64, 768])\n    mm_55 = torch.ops.aten.mm.default(view_250, permute_229);  permute_229 = None\n    permute_230 = torch.ops.aten.permute.default(view_250, [1, 0])\n    permute_44 = torch.ops.aten.permute.default(getitem_60, [0, 2, 1, 3])\n    view_65 = torch.ops.aten.view.default(permute_44, [1, 64, 768]);  permute_44 = None\n    view_66 = torch.ops.aten.view.default(view_65, [64, 768]);  view_65 = None\n    mm_56 = torch.ops.aten.mm.default(permute_230, view_66);  permute_230 = view_66 = None\n    permute_231 = torch.ops.aten.permute.default(mm_56, [1, 0]);  mm_56 = None\n    sum_83 = torch.ops.aten.sum.dim_IntList(view_250, [0], True);  view_250 = None\n    view_251 = torch.ops.aten.view.default(sum_83, [768]);  sum_83 = None\n    permute_232 = torch.ops.aten.permute.default(permute_231, [1, 0]);  permute_231 = None\n    view_252 = torch.ops.aten.view.default(mm_55, [1, 64, 768]);  mm_55 = None\n    view_253 = torch.ops.aten.view.default(view_252, [1, 64, 12, 64]);  view_252 = None\n    permute_233 = torch.ops.aten.permute.default(view_253, [0, 2, 1, 3]);  view_253 = None\n    _scaled_dot_product_efficient_attention_backward_6 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_233, permute_42, permute_41, permute_43, None, getitem_60, getitem_61, getitem_62, getitem_63, 0.0, [True, True, True, False], True);  permute_233 = permute_42 = permute_41 = permute_43 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = None\n    getitem_158 = _scaled_dot_product_efficient_attention_backward_6[0]\n    getitem_159 = _scaled_dot_product_efficient_attention_backward_6[1]\n    getitem_160 = _scaled_dot_product_efficient_attention_backward_6[2];  _scaled_dot_product_efficient_attention_backward_6 = None\n    permute_234 = torch.ops.aten.permute.default(getitem_160, [0, 2, 1, 3]);  getitem_160 = None\n    view_254 = torch.ops.aten.view.default(permute_234, [1, 64, 768]);  permute_234 = None\n    permute_235 = torch.ops.aten.permute.default(getitem_158, [0, 2, 1, 3]);  getitem_158 = None\n    view_255 = torch.ops.aten.view.default(permute_235, [1, 64, 768]);  permute_235 = None\n    permute_236 = torch.ops.aten.permute.default(getitem_159, [0, 2, 1, 3]);  getitem_159 = None\n    view_256 = torch.ops.aten.view.default(permute_236, [1, 64, 768]);  permute_236 = None\n    cat_6 = torch.ops.aten.cat.default([view_255, view_256, view_254], 2);  view_255 = view_256 = view_254 = None\n    view_257 = torch.ops.aten.view.default(cat_6, [64, 2304]);  cat_6 = None\n    mm_57 = torch.ops.aten.mm.default(view_257, permute_237);  permute_237 = None\n    permute_238 = torch.ops.aten.permute.default(view_257, [1, 0])\n    mm_58 = torch.ops.aten.mm.default(permute_238, view_60);  permute_238 = view_60 = None\n    permute_239 = torch.ops.aten.permute.default(mm_58, [1, 0]);  mm_58 = None\n    sum_84 = torch.ops.aten.sum.dim_IntList(view_257, [0], True);  view_257 = None\n    view_258 = torch.ops.aten.view.default(sum_84, [2304]);  sum_84 = None\n    permute_240 = torch.ops.aten.permute.default(permute_239, [1, 0]);  permute_239 = None\n    view_259 = torch.ops.aten.view.default(mm_57, [1, 64, 768]);  mm_57 = None\n    mul_260 = torch.ops.aten.mul.Tensor(view_259, primals_64);  primals_64 = None\n    mul_261 = torch.ops.aten.mul.Tensor(mul_260, 768)\n    sum_85 = torch.ops.aten.sum.dim_IntList(mul_260, [2], True)\n    mul_262 = torch.ops.aten.mul.Tensor(mul_260, mul_40);  mul_260 = None\n    sum_86 = torch.ops.aten.sum.dim_IntList(mul_262, [2], True);  mul_262 = None\n    mul_263 = torch.ops.aten.mul.Tensor(mul_40, sum_86);  sum_86 = None\n    sub_75 = torch.ops.aten.sub.Tensor(mul_261, sum_85);  mul_261 = sum_85 = None\n    sub_76 = torch.ops.aten.sub.Tensor(sub_75, mul_263);  sub_75 = mul_263 = None\n    mul_264 = torch.ops.aten.mul.Tensor(div_14, sub_76);  div_14 = sub_76 = None\n    mul_265 = torch.ops.aten.mul.Tensor(view_259, mul_40);  mul_40 = None\n    sum_87 = torch.ops.aten.sum.dim_IntList(mul_265, [0, 1]);  mul_265 = None\n    sum_88 = torch.ops.aten.sum.dim_IntList(view_259, [0, 1]);  view_259 = None\n    add_126 = torch.ops.aten.add.Tensor(add_125, mul_264);  add_125 = mul_264 = None\n    view_260 = torch.ops.aten.view.default(add_126, [64, 768])\n    mm_59 = torch.ops.aten.mm.default(view_260, permute_241);  permute_241 = None\n    permute_242 = torch.ops.aten.permute.default(view_260, [1, 0])\n    mm_60 = torch.ops.aten.mm.default(permute_242, view_58);  permute_242 = view_58 = None\n    permute_243 = torch.ops.aten.permute.default(mm_60, [1, 0]);  mm_60 = None\n    sum_89 = torch.ops.aten.sum.dim_IntList(view_260, [0], True);  view_260 = None\n    view_261 = torch.ops.aten.view.default(sum_89, [768]);  sum_89 = None\n    permute_244 = torch.ops.aten.permute.default(permute_243, [1, 0]);  permute_243 = None\n    view_262 = torch.ops.aten.view.default(mm_59, [1, 64, 3072]);  mm_59 = None\n    view_57 = torch.ops.aten.view.default(addmm_18, [1, 64, 3072]);  addmm_18 = None\n    mul_36 = torch.ops.aten.mul.Tensor(view_57, 0.5)\n    mul_266 = torch.ops.aten.mul.Tensor(view_262, mul_36);  mul_36 = None\n    pow_5 = torch.ops.aten.pow.Tensor_Scalar(view_57, 3.0)\n    mul_37 = torch.ops.aten.mul.Tensor(pow_5, 0.044715);  pow_5 = None\n    add_38 = torch.ops.aten.add.Tensor(view_57, mul_37);  mul_37 = None\n    mul_38 = torch.ops.aten.mul.Tensor(add_38, 0.7978845608028654);  add_38 = None\n    tanh_4 = torch.ops.aten.tanh.default(mul_38);  mul_38 = None\n    add_39 = torch.ops.aten.add.Tensor(tanh_4, 1.0)\n    mul_267 = torch.ops.aten.mul.Tensor(view_262, add_39);  view_262 = add_39 = None\n    mul_268 = torch.ops.aten.mul.Tensor(tanh_4, tanh_4);  tanh_4 = None\n    sub_77 = torch.ops.aten.sub.Tensor(1, mul_268);  mul_268 = None\n    mul_269 = torch.ops.aten.mul.Tensor(mul_266, sub_77);  mul_266 = sub_77 = None\n    mul_270 = torch.ops.aten.mul.Tensor(mul_269, 0.7978845608028654);  mul_269 = None\n    mul_271 = torch.ops.aten.mul.Tensor(mul_270, 0.044715)\n    pow_20 = torch.ops.aten.pow.Tensor_Scalar(view_57, 2.0);  view_57 = None\n    mul_272 = torch.ops.aten.mul.Scalar(pow_20, 3.0);  pow_20 = None\n    mul_273 = torch.ops.aten.mul.Tensor(mul_271, mul_272);  mul_271 = mul_272 = None\n    add_127 = torch.ops.aten.add.Tensor(mul_270, mul_273);  mul_270 = mul_273 = None\n    mul_274 = torch.ops.aten.mul.Tensor(mul_267, 0.5);  mul_267 = None\n    add_128 = torch.ops.aten.add.Tensor(add_127, mul_274);  add_127 = mul_274 = None\n    view_263 = torch.ops.aten.view.default(add_128, [64, 3072]);  add_128 = None\n    mm_61 = torch.ops.aten.mm.default(view_263, permute_245);  permute_245 = None\n    permute_246 = torch.ops.aten.permute.default(view_263, [1, 0])\n    mm_62 = torch.ops.aten.mm.default(permute_246, view_56);  permute_246 = view_56 = None\n    permute_247 = torch.ops.aten.permute.default(mm_62, [1, 0]);  mm_62 = None\n    sum_90 = torch.ops.aten.sum.dim_IntList(view_263, [0], True);  view_263 = None\n    view_264 = torch.ops.aten.view.default(sum_90, [3072]);  sum_90 = None\n    permute_248 = torch.ops.aten.permute.default(permute_247, [1, 0]);  permute_247 = None\n    view_265 = torch.ops.aten.view.default(mm_61, [1, 64, 768]);  mm_61 = None\n    mul_276 = torch.ops.aten.mul.Tensor(view_265, primals_58);  primals_58 = None\n    mul_277 = torch.ops.aten.mul.Tensor(mul_276, 768)\n    sum_91 = torch.ops.aten.sum.dim_IntList(mul_276, [2], True)\n    mul_278 = torch.ops.aten.mul.Tensor(mul_276, mul_34);  mul_276 = None\n    sum_92 = torch.ops.aten.sum.dim_IntList(mul_278, [2], True);  mul_278 = None\n    mul_279 = torch.ops.aten.mul.Tensor(mul_34, sum_92);  sum_92 = None\n    sub_79 = torch.ops.aten.sub.Tensor(mul_277, sum_91);  mul_277 = sum_91 = None\n    sub_80 = torch.ops.aten.sub.Tensor(sub_79, mul_279);  sub_79 = mul_279 = None\n    mul_280 = torch.ops.aten.mul.Tensor(div_15, sub_80);  div_15 = sub_80 = None\n    mul_281 = torch.ops.aten.mul.Tensor(view_265, mul_34);  mul_34 = None\n    sum_93 = torch.ops.aten.sum.dim_IntList(mul_281, [0, 1]);  mul_281 = None\n    sum_94 = torch.ops.aten.sum.dim_IntList(view_265, [0, 1]);  view_265 = None\n    add_129 = torch.ops.aten.add.Tensor(add_126, mul_280);  add_126 = mul_280 = None\n    view_266 = torch.ops.aten.view.default(add_129, [64, 768])\n    mm_63 = torch.ops.aten.mm.default(view_266, permute_249);  permute_249 = None\n    permute_250 = torch.ops.aten.permute.default(view_266, [1, 0])\n    permute_36 = torch.ops.aten.permute.default(getitem_49, [0, 2, 1, 3])\n    view_53 = torch.ops.aten.view.default(permute_36, [1, 64, 768]);  permute_36 = None\n    view_54 = torch.ops.aten.view.default(view_53, [64, 768]);  view_53 = None\n    mm_64 = torch.ops.aten.mm.default(permute_250, view_54);  permute_250 = view_54 = None\n    permute_251 = torch.ops.aten.permute.default(mm_64, [1, 0]);  mm_64 = None\n    sum_95 = torch.ops.aten.sum.dim_IntList(view_266, [0], True);  view_266 = None\n    view_267 = torch.ops.aten.view.default(sum_95, [768]);  sum_95 = None\n    permute_252 = torch.ops.aten.permute.default(permute_251, [1, 0]);  permute_251 = None\n    view_268 = torch.ops.aten.view.default(mm_63, [1, 64, 768]);  mm_63 = None\n    view_269 = torch.ops.aten.view.default(view_268, [1, 64, 12, 64]);  view_268 = None\n    permute_253 = torch.ops.aten.permute.default(view_269, [0, 2, 1, 3]);  view_269 = None\n    _scaled_dot_product_efficient_attention_backward_7 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_253, permute_34, permute_33, permute_35, None, getitem_49, getitem_50, getitem_51, getitem_52, 0.0, [True, True, True, False], True);  permute_253 = permute_34 = permute_33 = permute_35 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = None\n    getitem_162 = _scaled_dot_product_efficient_attention_backward_7[0]\n    getitem_163 = _scaled_dot_product_efficient_attention_backward_7[1]\n    getitem_164 = _scaled_dot_product_efficient_attention_backward_7[2];  _scaled_dot_product_efficient_attention_backward_7 = None\n    permute_254 = torch.ops.aten.permute.default(getitem_164, [0, 2, 1, 3]);  getitem_164 = None\n    view_270 = torch.ops.aten.view.default(permute_254, [1, 64, 768]);  permute_254 = None\n    permute_255 = torch.ops.aten.permute.default(getitem_162, [0, 2, 1, 3]);  getitem_162 = None\n    view_271 = torch.ops.aten.view.default(permute_255, [1, 64, 768]);  permute_255 = None\n    permute_256 = torch.ops.aten.permute.default(getitem_163, [0, 2, 1, 3]);  getitem_163 = None\n    view_272 = torch.ops.aten.view.default(permute_256, [1, 64, 768]);  permute_256 = None\n    cat_7 = torch.ops.aten.cat.default([view_271, view_272, view_270], 2);  view_271 = view_272 = view_270 = None\n    view_273 = torch.ops.aten.view.default(cat_7, [64, 2304]);  cat_7 = None\n    mm_65 = torch.ops.aten.mm.default(view_273, permute_257);  permute_257 = None\n    permute_258 = torch.ops.aten.permute.default(view_273, [1, 0])\n    mm_66 = torch.ops.aten.mm.default(permute_258, view_48);  permute_258 = view_48 = None\n    permute_259 = torch.ops.aten.permute.default(mm_66, [1, 0]);  mm_66 = None\n    sum_96 = torch.ops.aten.sum.dim_IntList(view_273, [0], True);  view_273 = None\n    view_274 = torch.ops.aten.view.default(sum_96, [2304]);  sum_96 = None\n    permute_260 = torch.ops.aten.permute.default(permute_259, [1, 0]);  permute_259 = None\n    view_275 = torch.ops.aten.view.default(mm_65, [1, 64, 768]);  mm_65 = None\n    mul_283 = torch.ops.aten.mul.Tensor(view_275, primals_52);  primals_52 = None\n    mul_284 = torch.ops.aten.mul.Tensor(mul_283, 768)\n    sum_97 = torch.ops.aten.sum.dim_IntList(mul_283, [2], True)\n    mul_285 = torch.ops.aten.mul.Tensor(mul_283, mul_32);  mul_283 = None\n    sum_98 = torch.ops.aten.sum.dim_IntList(mul_285, [2], True);  mul_285 = None\n    mul_286 = torch.ops.aten.mul.Tensor(mul_32, sum_98);  sum_98 = None\n    sub_82 = torch.ops.aten.sub.Tensor(mul_284, sum_97);  mul_284 = sum_97 = None\n    sub_83 = torch.ops.aten.sub.Tensor(sub_82, mul_286);  sub_82 = mul_286 = None\n    mul_287 = torch.ops.aten.mul.Tensor(div_16, sub_83);  div_16 = sub_83 = None\n    mul_288 = torch.ops.aten.mul.Tensor(view_275, mul_32);  mul_32 = None\n    sum_99 = torch.ops.aten.sum.dim_IntList(mul_288, [0, 1]);  mul_288 = None\n    sum_100 = torch.ops.aten.sum.dim_IntList(view_275, [0, 1]);  view_275 = None\n    add_130 = torch.ops.aten.add.Tensor(add_129, mul_287);  add_129 = mul_287 = None\n    view_276 = torch.ops.aten.view.default(add_130, [64, 768])\n    mm_67 = torch.ops.aten.mm.default(view_276, permute_261);  permute_261 = None\n    permute_262 = torch.ops.aten.permute.default(view_276, [1, 0])\n    mm_68 = torch.ops.aten.mm.default(permute_262, view_46);  permute_262 = view_46 = None\n    permute_263 = torch.ops.aten.permute.default(mm_68, [1, 0]);  mm_68 = None\n    sum_101 = torch.ops.aten.sum.dim_IntList(view_276, [0], True);  view_276 = None\n    view_277 = torch.ops.aten.view.default(sum_101, [768]);  sum_101 = None\n    permute_264 = torch.ops.aten.permute.default(permute_263, [1, 0]);  permute_263 = None\n    view_278 = torch.ops.aten.view.default(mm_67, [1, 64, 3072]);  mm_67 = None\n    view_45 = torch.ops.aten.view.default(addmm_14, [1, 64, 3072]);  addmm_14 = None\n    mul_28 = torch.ops.aten.mul.Tensor(view_45, 0.5)\n    mul_289 = torch.ops.aten.mul.Tensor(view_278, mul_28);  mul_28 = None\n    pow_4 = torch.ops.aten.pow.Tensor_Scalar(view_45, 3.0)\n    mul_29 = torch.ops.aten.mul.Tensor(pow_4, 0.044715);  pow_4 = None\n    add_30 = torch.ops.aten.add.Tensor(view_45, mul_29);  mul_29 = None\n    mul_30 = torch.ops.aten.mul.Tensor(add_30, 0.7978845608028654);  add_30 = None\n    tanh_3 = torch.ops.aten.tanh.default(mul_30);  mul_30 = None\n    add_31 = torch.ops.aten.add.Tensor(tanh_3, 1.0)\n    mul_290 = torch.ops.aten.mul.Tensor(view_278, add_31);  view_278 = add_31 = None\n    mul_291 = torch.ops.aten.mul.Tensor(tanh_3, tanh_3);  tanh_3 = None\n    sub_84 = torch.ops.aten.sub.Tensor(1, mul_291);  mul_291 = None\n    mul_292 = torch.ops.aten.mul.Tensor(mul_289, sub_84);  mul_289 = sub_84 = None\n    mul_293 = torch.ops.aten.mul.Tensor(mul_292, 0.7978845608028654);  mul_292 = None\n    mul_294 = torch.ops.aten.mul.Tensor(mul_293, 0.044715)\n    pow_21 = torch.ops.aten.pow.Tensor_Scalar(view_45, 2.0);  view_45 = None\n    mul_295 = torch.ops.aten.mul.Scalar(pow_21, 3.0);  pow_21 = None\n    mul_296 = torch.ops.aten.mul.Tensor(mul_294, mul_295);  mul_294 = mul_295 = None\n    add_131 = torch.ops.aten.add.Tensor(mul_293, mul_296);  mul_293 = mul_296 = None\n    mul_297 = torch.ops.aten.mul.Tensor(mul_290, 0.5);  mul_290 = None\n    add_132 = torch.ops.aten.add.Tensor(add_131, mul_297);  add_131 = mul_297 = None\n    view_279 = torch.ops.aten.view.default(add_132, [64, 3072]);  add_132 = None\n    mm_69 = torch.ops.aten.mm.default(view_279, permute_265);  permute_265 = None\n    permute_266 = torch.ops.aten.permute.default(view_279, [1, 0])\n    mm_70 = torch.ops.aten.mm.default(permute_266, view_44);  permute_266 = view_44 = None\n    permute_267 = torch.ops.aten.permute.default(mm_70, [1, 0]);  mm_70 = None\n    sum_102 = torch.ops.aten.sum.dim_IntList(view_279, [0], True);  view_279 = None\n    view_280 = torch.ops.aten.view.default(sum_102, [3072]);  sum_102 = None\n    permute_268 = torch.ops.aten.permute.default(permute_267, [1, 0]);  permute_267 = None\n    view_281 = torch.ops.aten.view.default(mm_69, [1, 64, 768]);  mm_69 = None\n    mul_299 = torch.ops.aten.mul.Tensor(view_281, primals_46);  primals_46 = None\n    mul_300 = torch.ops.aten.mul.Tensor(mul_299, 768)\n    sum_103 = torch.ops.aten.sum.dim_IntList(mul_299, [2], True)\n    mul_301 = torch.ops.aten.mul.Tensor(mul_299, mul_26);  mul_299 = None\n    sum_104 = torch.ops.aten.sum.dim_IntList(mul_301, [2], True);  mul_301 = None\n    mul_302 = torch.ops.aten.mul.Tensor(mul_26, sum_104);  sum_104 = None\n    sub_86 = torch.ops.aten.sub.Tensor(mul_300, sum_103);  mul_300 = sum_103 = None\n    sub_87 = torch.ops.aten.sub.Tensor(sub_86, mul_302);  sub_86 = mul_302 = None\n    mul_303 = torch.ops.aten.mul.Tensor(div_17, sub_87);  div_17 = sub_87 = None\n    mul_304 = torch.ops.aten.mul.Tensor(view_281, mul_26);  mul_26 = None\n    sum_105 = torch.ops.aten.sum.dim_IntList(mul_304, [0, 1]);  mul_304 = None\n    sum_106 = torch.ops.aten.sum.dim_IntList(view_281, [0, 1]);  view_281 = None\n    add_133 = torch.ops.aten.add.Tensor(add_130, mul_303);  add_130 = mul_303 = None\n    view_282 = torch.ops.aten.view.default(add_133, [64, 768])\n    mm_71 = torch.ops.aten.mm.default(view_282, permute_269);  permute_269 = None\n    permute_270 = torch.ops.aten.permute.default(view_282, [1, 0])\n    permute_28 = torch.ops.aten.permute.default(getitem_38, [0, 2, 1, 3])\n    view_41 = torch.ops.aten.view.default(permute_28, [1, 64, 768]);  permute_28 = None\n    view_42 = torch.ops.aten.view.default(view_41, [64, 768]);  view_41 = None\n    mm_72 = torch.ops.aten.mm.default(permute_270, view_42);  permute_270 = view_42 = None\n    permute_271 = torch.ops.aten.permute.default(mm_72, [1, 0]);  mm_72 = None\n    sum_107 = torch.ops.aten.sum.dim_IntList(view_282, [0], True);  view_282 = None\n    view_283 = torch.ops.aten.view.default(sum_107, [768]);  sum_107 = None\n    permute_272 = torch.ops.aten.permute.default(permute_271, [1, 0]);  permute_271 = None\n    view_284 = torch.ops.aten.view.default(mm_71, [1, 64, 768]);  mm_71 = None\n    view_285 = torch.ops.aten.view.default(view_284, [1, 64, 12, 64]);  view_284 = None\n    permute_273 = torch.ops.aten.permute.default(view_285, [0, 2, 1, 3]);  view_285 = None\n    _scaled_dot_product_efficient_attention_backward_8 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_273, permute_26, permute_25, permute_27, None, getitem_38, getitem_39, getitem_40, getitem_41, 0.0, [True, True, True, False], True);  permute_273 = permute_26 = permute_25 = permute_27 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = None\n    getitem_166 = _scaled_dot_product_efficient_attention_backward_8[0]\n    getitem_167 = _scaled_dot_product_efficient_attention_backward_8[1]\n    getitem_168 = _scaled_dot_product_efficient_attention_backward_8[2];  _scaled_dot_product_efficient_attention_backward_8 = None\n    permute_274 = torch.ops.aten.permute.default(getitem_168, [0, 2, 1, 3]);  getitem_168 = None\n    view_286 = torch.ops.aten.view.default(permute_274, [1, 64, 768]);  permute_274 = None\n    permute_275 = torch.ops.aten.permute.default(getitem_166, [0, 2, 1, 3]);  getitem_166 = None\n    view_287 = torch.ops.aten.view.default(permute_275, [1, 64, 768]);  permute_275 = None\n    permute_276 = torch.ops.aten.permute.default(getitem_167, [0, 2, 1, 3]);  getitem_167 = None\n    view_288 = torch.ops.aten.view.default(permute_276, [1, 64, 768]);  permute_276 = None\n    cat_8 = torch.ops.aten.cat.default([view_287, view_288, view_286], 2);  view_287 = view_288 = view_286 = None\n    view_289 = torch.ops.aten.view.default(cat_8, [64, 2304]);  cat_8 = None\n    mm_73 = torch.ops.aten.mm.default(view_289, permute_277);  permute_277 = None\n    permute_278 = torch.ops.aten.permute.default(view_289, [1, 0])\n    mm_74 = torch.ops.aten.mm.default(permute_278, view_36);  permute_278 = view_36 = None\n    permute_279 = torch.ops.aten.permute.default(mm_74, [1, 0]);  mm_74 = None\n    sum_108 = torch.ops.aten.sum.dim_IntList(view_289, [0], True);  view_289 = None\n    view_290 = torch.ops.aten.view.default(sum_108, [2304]);  sum_108 = None\n    permute_280 = torch.ops.aten.permute.default(permute_279, [1, 0]);  permute_279 = None\n    view_291 = torch.ops.aten.view.default(mm_73, [1, 64, 768]);  mm_73 = None\n    mul_306 = torch.ops.aten.mul.Tensor(view_291, primals_40);  primals_40 = None\n    mul_307 = torch.ops.aten.mul.Tensor(mul_306, 768)\n    sum_109 = torch.ops.aten.sum.dim_IntList(mul_306, [2], True)\n    mul_308 = torch.ops.aten.mul.Tensor(mul_306, mul_24);  mul_306 = None\n    sum_110 = torch.ops.aten.sum.dim_IntList(mul_308, [2], True);  mul_308 = None\n    mul_309 = torch.ops.aten.mul.Tensor(mul_24, sum_110);  sum_110 = None\n    sub_89 = torch.ops.aten.sub.Tensor(mul_307, sum_109);  mul_307 = sum_109 = None\n    sub_90 = torch.ops.aten.sub.Tensor(sub_89, mul_309);  sub_89 = mul_309 = None\n    mul_310 = torch.ops.aten.mul.Tensor(div_18, sub_90);  div_18 = sub_90 = None\n    mul_311 = torch.ops.aten.mul.Tensor(view_291, mul_24);  mul_24 = None\n    sum_111 = torch.ops.aten.sum.dim_IntList(mul_311, [0, 1]);  mul_311 = None\n    sum_112 = torch.ops.aten.sum.dim_IntList(view_291, [0, 1]);  view_291 = None\n    add_134 = torch.ops.aten.add.Tensor(add_133, mul_310);  add_133 = mul_310 = None\n    view_292 = torch.ops.aten.view.default(add_134, [64, 768])\n    mm_75 = torch.ops.aten.mm.default(view_292, permute_281);  permute_281 = None\n    permute_282 = torch.ops.aten.permute.default(view_292, [1, 0])\n    mm_76 = torch.ops.aten.mm.default(permute_282, view_34);  permute_282 = view_34 = None\n    permute_283 = torch.ops.aten.permute.default(mm_76, [1, 0]);  mm_76 = None\n    sum_113 = torch.ops.aten.sum.dim_IntList(view_292, [0], True);  view_292 = None\n    view_293 = torch.ops.aten.view.default(sum_113, [768]);  sum_113 = None\n    permute_284 = torch.ops.aten.permute.default(permute_283, [1, 0]);  permute_283 = None\n    view_294 = torch.ops.aten.view.default(mm_75, [1, 64, 3072]);  mm_75 = None\n    view_33 = torch.ops.aten.view.default(addmm_10, [1, 64, 3072]);  addmm_10 = None\n    mul_20 = torch.ops.aten.mul.Tensor(view_33, 0.5)\n    mul_312 = torch.ops.aten.mul.Tensor(view_294, mul_20);  mul_20 = None\n    pow_3 = torch.ops.aten.pow.Tensor_Scalar(view_33, 3.0)\n    mul_21 = torch.ops.aten.mul.Tensor(pow_3, 0.044715);  pow_3 = None\n    add_22 = torch.ops.aten.add.Tensor(view_33, mul_21);  mul_21 = None\n    mul_22 = torch.ops.aten.mul.Tensor(add_22, 0.7978845608028654);  add_22 = None\n    tanh_2 = torch.ops.aten.tanh.default(mul_22);  mul_22 = None\n    add_23 = torch.ops.aten.add.Tensor(tanh_2, 1.0)\n    mul_313 = torch.ops.aten.mul.Tensor(view_294, add_23);  view_294 = add_23 = None\n    mul_314 = torch.ops.aten.mul.Tensor(tanh_2, tanh_2);  tanh_2 = None\n    sub_91 = torch.ops.aten.sub.Tensor(1, mul_314);  mul_314 = None\n    mul_315 = torch.ops.aten.mul.Tensor(mul_312, sub_91);  mul_312 = sub_91 = None\n    mul_316 = torch.ops.aten.mul.Tensor(mul_315, 0.7978845608028654);  mul_315 = None\n    mul_317 = torch.ops.aten.mul.Tensor(mul_316, 0.044715)\n    pow_22 = torch.ops.aten.pow.Tensor_Scalar(view_33, 2.0);  view_33 = None\n    mul_318 = torch.ops.aten.mul.Scalar(pow_22, 3.0);  pow_22 = None\n    mul_319 = torch.ops.aten.mul.Tensor(mul_317, mul_318);  mul_317 = mul_318 = None\n    add_135 = torch.ops.aten.add.Tensor(mul_316, mul_319);  mul_316 = mul_319 = None\n    mul_320 = torch.ops.aten.mul.Tensor(mul_313, 0.5);  mul_313 = None\n    add_136 = torch.ops.aten.add.Tensor(add_135, mul_320);  add_135 = mul_320 = None\n    view_295 = torch.ops.aten.view.default(add_136, [64, 3072]);  add_136 = None\n    mm_77 = torch.ops.aten.mm.default(view_295, permute_285);  permute_285 = None\n    permute_286 = torch.ops.aten.permute.default(view_295, [1, 0])\n    mm_78 = torch.ops.aten.mm.default(permute_286, view_32);  permute_286 = view_32 = None\n    permute_287 = torch.ops.aten.permute.default(mm_78, [1, 0]);  mm_78 = None\n    sum_114 = torch.ops.aten.sum.dim_IntList(view_295, [0], True);  view_295 = None\n    view_296 = torch.ops.aten.view.default(sum_114, [3072]);  sum_114 = None\n    permute_288 = torch.ops.aten.permute.default(permute_287, [1, 0]);  permute_287 = None\n    view_297 = torch.ops.aten.view.default(mm_77, [1, 64, 768]);  mm_77 = None\n    mul_322 = torch.ops.aten.mul.Tensor(view_297, primals_34);  primals_34 = None\n    mul_323 = torch.ops.aten.mul.Tensor(mul_322, 768)\n    sum_115 = torch.ops.aten.sum.dim_IntList(mul_322, [2], True)\n    mul_324 = torch.ops.aten.mul.Tensor(mul_322, mul_18);  mul_322 = None\n    sum_116 = torch.ops.aten.sum.dim_IntList(mul_324, [2], True);  mul_324 = None\n    mul_325 = torch.ops.aten.mul.Tensor(mul_18, sum_116);  sum_116 = None\n    sub_93 = torch.ops.aten.sub.Tensor(mul_323, sum_115);  mul_323 = sum_115 = None\n    sub_94 = torch.ops.aten.sub.Tensor(sub_93, mul_325);  sub_93 = mul_325 = None\n    mul_326 = torch.ops.aten.mul.Tensor(div_19, sub_94);  div_19 = sub_94 = None\n    mul_327 = torch.ops.aten.mul.Tensor(view_297, mul_18);  mul_18 = None\n    sum_117 = torch.ops.aten.sum.dim_IntList(mul_327, [0, 1]);  mul_327 = None\n    sum_118 = torch.ops.aten.sum.dim_IntList(view_297, [0, 1]);  view_297 = None\n    add_137 = torch.ops.aten.add.Tensor(add_134, mul_326);  add_134 = mul_326 = None\n    view_298 = torch.ops.aten.view.default(add_137, [64, 768])\n    mm_79 = torch.ops.aten.mm.default(view_298, permute_289);  permute_289 = None\n    permute_290 = torch.ops.aten.permute.default(view_298, [1, 0])\n    permute_20 = torch.ops.aten.permute.default(getitem_27, [0, 2, 1, 3])\n    view_29 = torch.ops.aten.view.default(permute_20, [1, 64, 768]);  permute_20 = None\n    view_30 = torch.ops.aten.view.default(view_29, [64, 768]);  view_29 = None\n    mm_80 = torch.ops.aten.mm.default(permute_290, view_30);  permute_290 = view_30 = None\n    permute_291 = torch.ops.aten.permute.default(mm_80, [1, 0]);  mm_80 = None\n    sum_119 = torch.ops.aten.sum.dim_IntList(view_298, [0], True);  view_298 = None\n    view_299 = torch.ops.aten.view.default(sum_119, [768]);  sum_119 = None\n    permute_292 = torch.ops.aten.permute.default(permute_291, [1, 0]);  permute_291 = None\n    view_300 = torch.ops.aten.view.default(mm_79, [1, 64, 768]);  mm_79 = None\n    view_301 = torch.ops.aten.view.default(view_300, [1, 64, 12, 64]);  view_300 = None\n    permute_293 = torch.ops.aten.permute.default(view_301, [0, 2, 1, 3]);  view_301 = None\n    _scaled_dot_product_efficient_attention_backward_9 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_293, permute_18, permute_17, permute_19, None, getitem_27, getitem_28, getitem_29, getitem_30, 0.0, [True, True, True, False], True);  permute_293 = permute_18 = permute_17 = permute_19 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = None\n    getitem_170 = _scaled_dot_product_efficient_attention_backward_9[0]\n    getitem_171 = _scaled_dot_product_efficient_attention_backward_9[1]\n    getitem_172 = _scaled_dot_product_efficient_attention_backward_9[2];  _scaled_dot_product_efficient_attention_backward_9 = None\n    permute_294 = torch.ops.aten.permute.default(getitem_172, [0, 2, 1, 3]);  getitem_172 = None\n    view_302 = torch.ops.aten.view.default(permute_294, [1, 64, 768]);  permute_294 = None\n    permute_295 = torch.ops.aten.permute.default(getitem_170, [0, 2, 1, 3]);  getitem_170 = None\n    view_303 = torch.ops.aten.view.default(permute_295, [1, 64, 768]);  permute_295 = None\n    permute_296 = torch.ops.aten.permute.default(getitem_171, [0, 2, 1, 3]);  getitem_171 = None\n    view_304 = torch.ops.aten.view.default(permute_296, [1, 64, 768]);  permute_296 = None\n    cat_9 = torch.ops.aten.cat.default([view_303, view_304, view_302], 2);  view_303 = view_304 = view_302 = None\n    view_305 = torch.ops.aten.view.default(cat_9, [64, 2304]);  cat_9 = None\n    mm_81 = torch.ops.aten.mm.default(view_305, permute_297);  permute_297 = None\n    permute_298 = torch.ops.aten.permute.default(view_305, [1, 0])\n    mm_82 = torch.ops.aten.mm.default(permute_298, view_24);  permute_298 = view_24 = None\n    permute_299 = torch.ops.aten.permute.default(mm_82, [1, 0]);  mm_82 = None\n    sum_120 = torch.ops.aten.sum.dim_IntList(view_305, [0], True);  view_305 = None\n    view_306 = torch.ops.aten.view.default(sum_120, [2304]);  sum_120 = None\n    permute_300 = torch.ops.aten.permute.default(permute_299, [1, 0]);  permute_299 = None\n    view_307 = torch.ops.aten.view.default(mm_81, [1, 64, 768]);  mm_81 = None\n    mul_329 = torch.ops.aten.mul.Tensor(view_307, primals_28);  primals_28 = None\n    mul_330 = torch.ops.aten.mul.Tensor(mul_329, 768)\n    sum_121 = torch.ops.aten.sum.dim_IntList(mul_329, [2], True)\n    mul_331 = torch.ops.aten.mul.Tensor(mul_329, mul_16);  mul_329 = None\n    sum_122 = torch.ops.aten.sum.dim_IntList(mul_331, [2], True);  mul_331 = None\n    mul_332 = torch.ops.aten.mul.Tensor(mul_16, sum_122);  sum_122 = None\n    sub_96 = torch.ops.aten.sub.Tensor(mul_330, sum_121);  mul_330 = sum_121 = None\n    sub_97 = torch.ops.aten.sub.Tensor(sub_96, mul_332);  sub_96 = mul_332 = None\n    mul_333 = torch.ops.aten.mul.Tensor(div_20, sub_97);  div_20 = sub_97 = None\n    mul_334 = torch.ops.aten.mul.Tensor(view_307, mul_16);  mul_16 = None\n    sum_123 = torch.ops.aten.sum.dim_IntList(mul_334, [0, 1]);  mul_334 = None\n    sum_124 = torch.ops.aten.sum.dim_IntList(view_307, [0, 1]);  view_307 = None\n    add_138 = torch.ops.aten.add.Tensor(add_137, mul_333);  add_137 = mul_333 = None\n    view_308 = torch.ops.aten.view.default(add_138, [64, 768])\n    mm_83 = torch.ops.aten.mm.default(view_308, permute_301);  permute_301 = None\n    permute_302 = torch.ops.aten.permute.default(view_308, [1, 0])\n    mm_84 = torch.ops.aten.mm.default(permute_302, view_22);  permute_302 = view_22 = None\n    permute_303 = torch.ops.aten.permute.default(mm_84, [1, 0]);  mm_84 = None\n    sum_125 = torch.ops.aten.sum.dim_IntList(view_308, [0], True);  view_308 = None\n    view_309 = torch.ops.aten.view.default(sum_125, [768]);  sum_125 = None\n    permute_304 = torch.ops.aten.permute.default(permute_303, [1, 0]);  permute_303 = None\n    view_310 = torch.ops.aten.view.default(mm_83, [1, 64, 3072]);  mm_83 = None\n    view_21 = torch.ops.aten.view.default(addmm_6, [1, 64, 3072]);  addmm_6 = None\n    mul_12 = torch.ops.aten.mul.Tensor(view_21, 0.5)\n    mul_335 = torch.ops.aten.mul.Tensor(view_310, mul_12);  mul_12 = None\n    pow_2 = torch.ops.aten.pow.Tensor_Scalar(view_21, 3.0)\n    mul_13 = torch.ops.aten.mul.Tensor(pow_2, 0.044715);  pow_2 = None\n    add_14 = torch.ops.aten.add.Tensor(view_21, mul_13);  mul_13 = None\n    mul_14 = torch.ops.aten.mul.Tensor(add_14, 0.7978845608028654);  add_14 = None\n    tanh_1 = torch.ops.aten.tanh.default(mul_14);  mul_14 = None\n    add_15 = torch.ops.aten.add.Tensor(tanh_1, 1.0)\n    mul_336 = torch.ops.aten.mul.Tensor(view_310, add_15);  view_310 = add_15 = None\n    mul_337 = torch.ops.aten.mul.Tensor(tanh_1, tanh_1);  tanh_1 = None\n    sub_98 = torch.ops.aten.sub.Tensor(1, mul_337);  mul_337 = None\n    mul_338 = torch.ops.aten.mul.Tensor(mul_335, sub_98);  mul_335 = sub_98 = None\n    mul_339 = torch.ops.aten.mul.Tensor(mul_338, 0.7978845608028654);  mul_338 = None\n    mul_340 = torch.ops.aten.mul.Tensor(mul_339, 0.044715)\n    pow_23 = torch.ops.aten.pow.Tensor_Scalar(view_21, 2.0);  view_21 = None\n    mul_341 = torch.ops.aten.mul.Scalar(pow_23, 3.0);  pow_23 = None\n    mul_342 = torch.ops.aten.mul.Tensor(mul_340, mul_341);  mul_340 = mul_341 = None\n    add_139 = torch.ops.aten.add.Tensor(mul_339, mul_342);  mul_339 = mul_342 = None\n    mul_343 = torch.ops.aten.mul.Tensor(mul_336, 0.5);  mul_336 = None\n    add_140 = torch.ops.aten.add.Tensor(add_139, mul_343);  add_139 = mul_343 = None\n    view_311 = torch.ops.aten.view.default(add_140, [64, 3072]);  add_140 = None\n    mm_85 = torch.ops.aten.mm.default(view_311, permute_305);  permute_305 = None\n    permute_306 = torch.ops.aten.permute.default(view_311, [1, 0])\n    mm_86 = torch.ops.aten.mm.default(permute_306, view_20);  permute_306 = view_20 = None\n    permute_307 = torch.ops.aten.permute.default(mm_86, [1, 0]);  mm_86 = None\n    sum_126 = torch.ops.aten.sum.dim_IntList(view_311, [0], True);  view_311 = None\n    view_312 = torch.ops.aten.view.default(sum_126, [3072]);  sum_126 = None\n    permute_308 = torch.ops.aten.permute.default(permute_307, [1, 0]);  permute_307 = None\n    view_313 = torch.ops.aten.view.default(mm_85, [1, 64, 768]);  mm_85 = None\n    mul_345 = torch.ops.aten.mul.Tensor(view_313, primals_22);  primals_22 = None\n    mul_346 = torch.ops.aten.mul.Tensor(mul_345, 768)\n    sum_127 = torch.ops.aten.sum.dim_IntList(mul_345, [2], True)\n    mul_347 = torch.ops.aten.mul.Tensor(mul_345, mul_10);  mul_345 = None\n    sum_128 = torch.ops.aten.sum.dim_IntList(mul_347, [2], True);  mul_347 = None\n    mul_348 = torch.ops.aten.mul.Tensor(mul_10, sum_128);  sum_128 = None\n    sub_100 = torch.ops.aten.sub.Tensor(mul_346, sum_127);  mul_346 = sum_127 = None\n    sub_101 = torch.ops.aten.sub.Tensor(sub_100, mul_348);  sub_100 = mul_348 = None\n    mul_349 = torch.ops.aten.mul.Tensor(div_21, sub_101);  div_21 = sub_101 = None\n    mul_350 = torch.ops.aten.mul.Tensor(view_313, mul_10);  mul_10 = None\n    sum_129 = torch.ops.aten.sum.dim_IntList(mul_350, [0, 1]);  mul_350 = None\n    sum_130 = torch.ops.aten.sum.dim_IntList(view_313, [0, 1]);  view_313 = None\n    add_141 = torch.ops.aten.add.Tensor(add_138, mul_349);  add_138 = mul_349 = None\n    view_314 = torch.ops.aten.view.default(add_141, [64, 768])\n    mm_87 = torch.ops.aten.mm.default(view_314, permute_309);  permute_309 = None\n    permute_310 = torch.ops.aten.permute.default(view_314, [1, 0])\n    permute_12 = torch.ops.aten.permute.default(getitem_16, [0, 2, 1, 3])\n    view_17 = torch.ops.aten.view.default(permute_12, [1, 64, 768]);  permute_12 = None\n    view_18 = torch.ops.aten.view.default(view_17, [64, 768]);  view_17 = None\n    mm_88 = torch.ops.aten.mm.default(permute_310, view_18);  permute_310 = view_18 = None\n    permute_311 = torch.ops.aten.permute.default(mm_88, [1, 0]);  mm_88 = None\n    sum_131 = torch.ops.aten.sum.dim_IntList(view_314, [0], True);  view_314 = None\n    view_315 = torch.ops.aten.view.default(sum_131, [768]);  sum_131 = None\n    permute_312 = torch.ops.aten.permute.default(permute_311, [1, 0]);  permute_311 = None\n    view_316 = torch.ops.aten.view.default(mm_87, [1, 64, 768]);  mm_87 = None\n    view_317 = torch.ops.aten.view.default(view_316, [1, 64, 12, 64]);  view_316 = None\n    permute_313 = torch.ops.aten.permute.default(view_317, [0, 2, 1, 3]);  view_317 = None\n    _scaled_dot_product_efficient_attention_backward_10 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_313, permute_10, permute_9, permute_11, None, getitem_16, getitem_17, getitem_18, getitem_19, 0.0, [True, True, True, False], True);  permute_313 = permute_10 = permute_9 = permute_11 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = None\n    getitem_174 = _scaled_dot_product_efficient_attention_backward_10[0]\n    getitem_175 = _scaled_dot_product_efficient_attention_backward_10[1]\n    getitem_176 = _scaled_dot_product_efficient_attention_backward_10[2];  _scaled_dot_product_efficient_attention_backward_10 = None\n    permute_314 = torch.ops.aten.permute.default(getitem_176, [0, 2, 1, 3]);  getitem_176 = None\n    view_318 = torch.ops.aten.view.default(permute_314, [1, 64, 768]);  permute_314 = None\n    permute_315 = torch.ops.aten.permute.default(getitem_174, [0, 2, 1, 3]);  getitem_174 = None\n    view_319 = torch.ops.aten.view.default(permute_315, [1, 64, 768]);  permute_315 = None\n    permute_316 = torch.ops.aten.permute.default(getitem_175, [0, 2, 1, 3]);  getitem_175 = None\n    view_320 = torch.ops.aten.view.default(permute_316, [1, 64, 768]);  permute_316 = None\n    cat_10 = torch.ops.aten.cat.default([view_319, view_320, view_318], 2);  view_319 = view_320 = view_318 = None\n    view_321 = torch.ops.aten.view.default(cat_10, [64, 2304]);  cat_10 = None\n    mm_89 = torch.ops.aten.mm.default(view_321, permute_317);  permute_317 = None\n    permute_318 = torch.ops.aten.permute.default(view_321, [1, 0])\n    mm_90 = torch.ops.aten.mm.default(permute_318, view_12);  permute_318 = view_12 = None\n    permute_319 = torch.ops.aten.permute.default(mm_90, [1, 0]);  mm_90 = None\n    sum_132 = torch.ops.aten.sum.dim_IntList(view_321, [0], True);  view_321 = None\n    view_322 = torch.ops.aten.view.default(sum_132, [2304]);  sum_132 = None\n    permute_320 = torch.ops.aten.permute.default(permute_319, [1, 0]);  permute_319 = None\n    view_323 = torch.ops.aten.view.default(mm_89, [1, 64, 768]);  mm_89 = None\n    mul_352 = torch.ops.aten.mul.Tensor(view_323, primals_16);  primals_16 = None\n    mul_353 = torch.ops.aten.mul.Tensor(mul_352, 768)\n    sum_133 = torch.ops.aten.sum.dim_IntList(mul_352, [2], True)\n    mul_354 = torch.ops.aten.mul.Tensor(mul_352, mul_8);  mul_352 = None\n    sum_134 = torch.ops.aten.sum.dim_IntList(mul_354, [2], True);  mul_354 = None\n    mul_355 = torch.ops.aten.mul.Tensor(mul_8, sum_134);  sum_134 = None\n    sub_103 = torch.ops.aten.sub.Tensor(mul_353, sum_133);  mul_353 = sum_133 = None\n    sub_104 = torch.ops.aten.sub.Tensor(sub_103, mul_355);  sub_103 = mul_355 = None\n    mul_356 = torch.ops.aten.mul.Tensor(div_22, sub_104);  div_22 = sub_104 = None\n    mul_357 = torch.ops.aten.mul.Tensor(view_323, mul_8);  mul_8 = None\n    sum_135 = torch.ops.aten.sum.dim_IntList(mul_357, [0, 1]);  mul_357 = None\n    sum_136 = torch.ops.aten.sum.dim_IntList(view_323, [0, 1]);  view_323 = None\n    add_142 = torch.ops.aten.add.Tensor(add_141, mul_356);  add_141 = mul_356 = None\n    view_324 = torch.ops.aten.view.default(add_142, [64, 768])\n    mm_91 = torch.ops.aten.mm.default(view_324, permute_321);  permute_321 = None\n    permute_322 = torch.ops.aten.permute.default(view_324, [1, 0])\n    mm_92 = torch.ops.aten.mm.default(permute_322, view_10);  permute_322 = view_10 = None\n    permute_323 = torch.ops.aten.permute.default(mm_92, [1, 0]);  mm_92 = None\n    sum_137 = torch.ops.aten.sum.dim_IntList(view_324, [0], True);  view_324 = None\n    view_325 = torch.ops.aten.view.default(sum_137, [768]);  sum_137 = None\n    permute_324 = torch.ops.aten.permute.default(permute_323, [1, 0]);  permute_323 = None\n    view_326 = torch.ops.aten.view.default(mm_91, [1, 64, 3072]);  mm_91 = None\n    view_9 = torch.ops.aten.view.default(addmm_2, [1, 64, 3072]);  addmm_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(view_9, 0.5)\n    mul_358 = torch.ops.aten.mul.Tensor(view_326, mul_4);  mul_4 = None\n    pow_1 = torch.ops.aten.pow.Tensor_Scalar(view_9, 3.0)\n    mul_5 = torch.ops.aten.mul.Tensor(pow_1, 0.044715);  pow_1 = None\n    add_6 = torch.ops.aten.add.Tensor(view_9, mul_5);  mul_5 = None\n    mul_6 = torch.ops.aten.mul.Tensor(add_6, 0.7978845608028654);  add_6 = None\n    tanh = torch.ops.aten.tanh.default(mul_6);  mul_6 = None\n    add_7 = torch.ops.aten.add.Tensor(tanh, 1.0)\n    mul_359 = torch.ops.aten.mul.Tensor(view_326, add_7);  view_326 = add_7 = None\n    mul_360 = torch.ops.aten.mul.Tensor(tanh, tanh);  tanh = None\n    sub_105 = torch.ops.aten.sub.Tensor(1, mul_360);  mul_360 = None\n    mul_361 = torch.ops.aten.mul.Tensor(mul_358, sub_105);  mul_358 = sub_105 = None\n    mul_362 = torch.ops.aten.mul.Tensor(mul_361, 0.7978845608028654);  mul_361 = None\n    mul_363 = torch.ops.aten.mul.Tensor(mul_362, 0.044715)\n    pow_24 = torch.ops.aten.pow.Tensor_Scalar(view_9, 2.0);  view_9 = None\n    mul_364 = torch.ops.aten.mul.Scalar(pow_24, 3.0);  pow_24 = None\n    mul_365 = torch.ops.aten.mul.Tensor(mul_363, mul_364);  mul_363 = mul_364 = None\n    add_143 = torch.ops.aten.add.Tensor(mul_362, mul_365);  mul_362 = mul_365 = None\n    mul_366 = torch.ops.aten.mul.Tensor(mul_359, 0.5);  mul_359 = None\n    add_144 = torch.ops.aten.add.Tensor(add_143, mul_366);  add_143 = mul_366 = None\n    view_327 = torch.ops.aten.view.default(add_144, [64, 3072]);  add_144 = None\n    mm_93 = torch.ops.aten.mm.default(view_327, permute_325);  permute_325 = None\n    permute_326 = torch.ops.aten.permute.default(view_327, [1, 0])\n    mm_94 = torch.ops.aten.mm.default(permute_326, view_8);  permute_326 = view_8 = None\n    permute_327 = torch.ops.aten.permute.default(mm_94, [1, 0]);  mm_94 = None\n    sum_138 = torch.ops.aten.sum.dim_IntList(view_327, [0], True);  view_327 = None\n    view_328 = torch.ops.aten.view.default(sum_138, [3072]);  sum_138 = None\n    permute_328 = torch.ops.aten.permute.default(permute_327, [1, 0]);  permute_327 = None\n    view_329 = torch.ops.aten.view.default(mm_93, [1, 64, 768]);  mm_93 = None\n    mul_368 = torch.ops.aten.mul.Tensor(view_329, primals_10);  primals_10 = None\n    mul_369 = torch.ops.aten.mul.Tensor(mul_368, 768)\n    sum_139 = torch.ops.aten.sum.dim_IntList(mul_368, [2], True)\n    mul_370 = torch.ops.aten.mul.Tensor(mul_368, mul_2);  mul_368 = None\n    sum_140 = torch.ops.aten.sum.dim_IntList(mul_370, [2], True);  mul_370 = None\n    mul_371 = torch.ops.aten.mul.Tensor(mul_2, sum_140);  sum_140 = None\n    sub_107 = torch.ops.aten.sub.Tensor(mul_369, sum_139);  mul_369 = sum_139 = None\n    sub_108 = torch.ops.aten.sub.Tensor(sub_107, mul_371);  sub_107 = mul_371 = None\n    mul_372 = torch.ops.aten.mul.Tensor(div_23, sub_108);  div_23 = sub_108 = None\n    mul_373 = torch.ops.aten.mul.Tensor(view_329, mul_2);  mul_2 = None\n    sum_141 = torch.ops.aten.sum.dim_IntList(mul_373, [0, 1]);  mul_373 = None\n    sum_142 = torch.ops.aten.sum.dim_IntList(view_329, [0, 1]);  view_329 = None\n    add_145 = torch.ops.aten.add.Tensor(add_142, mul_372);  add_142 = mul_372 = None\n    view_330 = torch.ops.aten.view.default(add_145, [64, 768])\n    mm_95 = torch.ops.aten.mm.default(view_330, permute_329);  permute_329 = None\n    permute_330 = torch.ops.aten.permute.default(view_330, [1, 0])\n    permute_4 = torch.ops.aten.permute.default(getitem_5, [0, 2, 1, 3])\n    view_5 = torch.ops.aten.view.default(permute_4, [1, 64, 768]);  permute_4 = None\n    view_6 = torch.ops.aten.view.default(view_5, [64, 768]);  view_5 = None\n    mm_96 = torch.ops.aten.mm.default(permute_330, view_6);  permute_330 = view_6 = None\n    permute_331 = torch.ops.aten.permute.default(mm_96, [1, 0]);  mm_96 = None\n    sum_143 = torch.ops.aten.sum.dim_IntList(view_330, [0], True);  view_330 = None\n    view_331 = torch.ops.aten.view.default(sum_143, [768]);  sum_143 = None\n    permute_332 = torch.ops.aten.permute.default(permute_331, [1, 0]);  permute_331 = None\n    view_332 = torch.ops.aten.view.default(mm_95, [1, 64, 768]);  mm_95 = None\n    view_333 = torch.ops.aten.view.default(view_332, [1, 64, 12, 64]);  view_332 = None\n    permute_333 = torch.ops.aten.permute.default(view_333, [0, 2, 1, 3]);  view_333 = None\n    _scaled_dot_product_efficient_attention_backward_11 = torch.ops.aten._scaled_dot_product_efficient_attention_backward.default(permute_333, permute_2, permute_1, permute_3, None, getitem_5, getitem_6, getitem_7, getitem_8, 0.0, [True, True, True, False], True);  permute_333 = permute_2 = permute_1 = permute_3 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = None\n    getitem_178 = _scaled_dot_product_efficient_attention_backward_11[0]\n    getitem_179 = _scaled_dot_product_efficient_attention_backward_11[1]\n    getitem_180 = _scaled_dot_product_efficient_attention_backward_11[2];  _scaled_dot_product_efficient_attention_backward_11 = None\n    permute_334 = torch.ops.aten.permute.default(getitem_180, [0, 2, 1, 3]);  getitem_180 = None\n    view_334 = torch.ops.aten.view.default(permute_334, [1, 64, 768]);  permute_334 = None\n    permute_335 = torch.ops.aten.permute.default(getitem_178, [0, 2, 1, 3]);  getitem_178 = None\n    view_335 = torch.ops.aten.view.default(permute_335, [1, 64, 768]);  permute_335 = None\n    permute_336 = torch.ops.aten.permute.default(getitem_179, [0, 2, 1, 3]);  getitem_179 = None\n    view_336 = torch.ops.aten.view.default(permute_336, [1, 64, 768]);  permute_336 = None\n    cat_11 = torch.ops.aten.cat.default([view_335, view_336, view_334], 2);  view_335 = view_336 = view_334 = None\n    view_337 = torch.ops.aten.view.default(cat_11, [64, 2304]);  cat_11 = None\n    mm_97 = torch.ops.aten.mm.default(view_337, permute_337);  permute_337 = None\n    permute_338 = torch.ops.aten.permute.default(view_337, [1, 0])\n    mm_98 = torch.ops.aten.mm.default(permute_338, view);  permute_338 = view = None\n    permute_339 = torch.ops.aten.permute.default(mm_98, [1, 0]);  mm_98 = None\n    sum_144 = torch.ops.aten.sum.dim_IntList(view_337, [0], True);  view_337 = None\n    view_338 = torch.ops.aten.view.default(sum_144, [2304]);  sum_144 = None\n    permute_340 = torch.ops.aten.permute.default(permute_339, [1, 0]);  permute_339 = None\n    view_339 = torch.ops.aten.view.default(mm_97, [1, 64, 768]);  mm_97 = None\n    mul_375 = torch.ops.aten.mul.Tensor(view_339, primals_4);  primals_4 = None\n    mul_376 = torch.ops.aten.mul.Tensor(mul_375, 768)\n    sum_145 = torch.ops.aten.sum.dim_IntList(mul_375, [2], True)\n    mul_377 = torch.ops.aten.mul.Tensor(mul_375, mul);  mul_375 = None\n    sum_146 = torch.ops.aten.sum.dim_IntList(mul_377, [2], True);  mul_377 = None\n    mul_378 = torch.ops.aten.mul.Tensor(mul, sum_146);  sum_146 = None\n    sub_110 = torch.ops.aten.sub.Tensor(mul_376, sum_145);  mul_376 = sum_145 = None\n    sub_111 = torch.ops.aten.sub.Tensor(sub_110, mul_378);  sub_110 = mul_378 = None\n    mul_379 = torch.ops.aten.mul.Tensor(div_24, sub_111);  div_24 = sub_111 = None\n    mul_380 = torch.ops.aten.mul.Tensor(view_339, mul);  mul = None\n    sum_147 = torch.ops.aten.sum.dim_IntList(mul_380, [0, 1]);  mul_380 = None\n    sum_148 = torch.ops.aten.sum.dim_IntList(view_339, [0, 1]);  view_339 = None\n    add_146 = torch.ops.aten.add.Tensor(add_145, mul_379);  add_145 = mul_379 = None\n    eq = torch.ops.aten.eq.Scalar(unsqueeze, -1)\n    unsqueeze_1 = torch.ops.aten.unsqueeze.default(eq, -1);  eq = None\n    full_default_4 = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    where = torch.ops.aten.where.self(unsqueeze_1, full_default_4, add_146);  unsqueeze_1 = None\n    full_default_5 = torch.ops.aten.full.default([1024, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_5, [unsqueeze], where, True);  full_default_5 = unsqueeze = where = None\n    eq_1 = torch.ops.aten.eq.Scalar(primals_1, -1)\n    unsqueeze_2 = torch.ops.aten.unsqueeze.default(eq_1, -1);  eq_1 = None\n    where_1 = torch.ops.aten.where.self(unsqueeze_2, full_default_4, add_146);  unsqueeze_2 = full_default_4 = add_146 = None\n    full_default_7 = torch.ops.aten.full.default([50304, 768], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)\n    _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_7, [primals_1], where_1, True);  full_default_7 = primals_1 = where_1 = None\n    add_147 = torch.ops.aten.add.Tensor(permute_100, _unsafe_index_put_1);  permute_100 = _unsafe_index_put_1 = None\n    return (None, add_147, _unsafe_index_put, sum_147, sum_148, permute_340, view_338, permute_332, view_331, sum_141, sum_142, permute_328, view_328, permute_324, view_325, sum_135, sum_136, permute_320, view_322, permute_312, view_315, sum_129, sum_130, permute_308, view_312, permute_304, view_309, sum_123, sum_124, permute_300, view_306, permute_292, view_299, sum_117, sum_118, permute_288, view_296, permute_284, view_293, sum_111, sum_112, permute_280, view_290, permute_272, view_283, sum_105, sum_106, permute_268, view_280, permute_264, view_277, sum_99, sum_100, permute_260, view_274, permute_252, view_267, sum_93, sum_94, permute_248, view_264, permute_244, view_261, sum_87, sum_88, permute_240, view_258, permute_232, view_251, sum_81, sum_82, permute_228, view_248, permute_224, view_245, sum_75, sum_76, permute_220, view_242, permute_212, view_235, sum_69, sum_70, permute_208, view_232, permute_204, view_229, sum_63, sum_64, permute_200, view_226, permute_192, view_219, sum_57, sum_58, permute_188, view_216, permute_184, view_213, sum_51, sum_52, permute_180, view_210, permute_172, view_203, sum_45, sum_46, permute_168, view_200, permute_164, view_197, sum_39, sum_40, permute_160, view_194, permute_152, view_187, sum_33, sum_34, permute_148, view_184, permute_144, view_181, sum_27, sum_28, permute_140, view_178, permute_132, view_171, sum_21, sum_22, permute_128, view_168, permute_124, view_165, sum_15, sum_16, permute_120, view_162, permute_112, view_155, sum_9, sum_10, permute_108, view_152, permute_104, view_149, sum_3, sum_4)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[0]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[5yvj33oys7gdk532knvznzkakzkoiu5vfpvjdr4iy5pj5yukree] example_inputs[26]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1, 64]), stride=(64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=512, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[34]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[35]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[47]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[48]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[60]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[61]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[73]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[74]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[86]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[87]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[99]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[100]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[112]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[113]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[125]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[126]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[138]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[139]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[151]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[152]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[164]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[165]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[yhigovqtqf2ic6iv63ysozcs3xf23g6ytwxxo3ngkb2rkejwqhy] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=768, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tkhdp6hpzz4wkrsn2gtkob6gjizy3tnh2i335gnvs5dqze2c635] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tymrisxjw2ay2wkyisoywckuw25f6qkcnbqn6ixlh4bzobrvrpw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(147456, 64, 2304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=1536, storage_bytes=589824, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[vwryj3hywd6syuzjzojtwl2i75fr4o3frmbjuk3jj5iywt4uhml] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64, 64]), stride=(49152, 64, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=None, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ypapltbv2xbwkwfrnhvu5iqgrtqrjol43wgxrobbgunlbawqjpw] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 12, 64]), stride=(768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[177]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[tupemxj7hujko5zasqsng7u56x3chsaznovjmn2zs4kn5wlvoce] example_inputs[178]: TensorMetadata(dtype=torch.int64, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[rw2lfs4g3wumdacs5ykwrdhclqz5vzsnjfqmmo5ztwqnxn75jxd] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[y3abg6r4y5l3gztzm3humpnaec6pvtrvrqjeyknxzxbyuum5hro] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([64, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=786432, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[4do6jylr3mtx4d264ertpe6pziorxtcf2s4hikwtibwmouxw25h] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 768]), stride=(49152, 768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=196608, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aba44qxan7tyih7ljdxyqka53vkn25cmdzgth5cyl2s7qorx7vi] example_inputs[184]: TensorMetadata(dtype=torch.int64, shape=torch.Size([1]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=8, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[qitbyzr7emyctium3gjpb6gcr75vrxwd24qiyojnre7qqd7zo4f] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[mxqz7gd4dbgzufqocz5p7oivwzmpjmkhr6kbxm4dewvedblut3z] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 64, 1]), stride=(64, 1, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=256, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[u55blbbc73afkevwx6ofprgxxytl7dbrkgoal4z3b6od3qdlugs] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 1, 50304]), stride=(50304, 50304, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_backward]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_inference]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[y3e3yuxtssnww62nt5exdblxjs4qqfe6m45lbogy57sjgkkgd7s] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259]", "[gqceoov337f2fxydib545wytq5n6f565gacorvs7dzatfibcnjq] fx_kwargs[user_visible_outputs]: {'add_147': None, '_unsafe_index_put': None, 'sum_147': None, 'sum_148': None, 'permute_340': None, 'view_338': None, 'permute_332': None, 'view_331': None, 'sum_141': None, 'sum_142': None, 'permute_328': None, 'view_328': None, 'permute_324': None, 'view_325': None, 'sum_135': None, 'sum_136': None, 'permute_320': None, 'view_322': None, 'permute_312': None, 'view_315': None, 'sum_129': None, 'sum_130': None, 'permute_308': None, 'view_312': None, 'permute_304': None, 'view_309': None, 'sum_123': None, 'sum_124': None, 'permute_300': None, 'view_306': None, 'permute_292': None, 'view_299': None, 'sum_117': None, 'sum_118': None, 'permute_288': None, 'view_296': None, 'permute_284': None, 'view_293': None, 'sum_111': None, 'sum_112': None, 'permute_280': None, 'view_290': None, 'permute_272': None, 'view_283': None, 'sum_105': None, 'sum_106': None, 'permute_268': None, 'view_280': None, 'permute_264': None, 'view_277': None, 'sum_99': None, 'sum_100': None, 'permute_260': None, 'view_274': None, 'permute_252': None, 'view_267': None, 'sum_93': None, 'sum_94': None, 'permute_248': None, 'view_264': None, 'permute_244': None, 'view_261': None, 'sum_87': None, 'sum_88': None, 'permute_240': None, 'view_258': None, 'permute_232': None, 'view_251': None, 'sum_81': None, 'sum_82': None, 'permute_228': None, 'view_248': None, 'permute_224': None, 'view_245': None, 'sum_75': None, 'sum_76': None, 'permute_220': None, 'view_242': None, 'permute_212': None, 'view_235': None, 'sum_69': None, 'sum_70': None, 'permute_208': None, 'view_232': None, 'permute_204': None, 'view_229': None, 'sum_63': None, 'sum_64': None, 'permute_200': None, 'view_226': None, 'permute_192': None, 'view_219': None, 'sum_57': None, 'sum_58': None, 'permute_188': None, 'view_216': None, 'permute_184': None, 'view_213': None, 'sum_51': None, 'sum_52': None, 'permute_180': None, 'view_210': None, 'permute_172': None, 'view_203': None, 'sum_45': None, 'sum_46': None, 'permute_168': None, 'view_200': None, 'permute_164': None, 'view_197': None, 'sum_39': None, 'sum_40': None, 'permute_160': None, 'view_194': None, 'permute_152': None, 'view_187': None, 'sum_33': None, 'sum_34': None, 'permute_148': None, 'view_184': None, 'permute_144': None, 'view_181': None, 'sum_27': None, 'sum_28': None, 'permute_140': None, 'view_178': None, 'permute_132': None, 'view_171': None, 'sum_21': None, 'sum_22': None, 'permute_128': None, 'view_168': None, 'permute_124': None, 'view_165': None, 'sum_15': None, 'sum_16': None, 'permute_120': None, 'view_162': None, 'permute_112': None, 'view_155': None, 'sum_9': None, 'sum_10': None, 'permute_108': None, 'view_152': None, 'permute_104': None, 'view_149': None, 'sum_3': None, 'sum_4': None}", "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[0]: 260", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]}
+V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "686420174d58db1ad3214083421e88f9"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977765129674.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "850f6f968f76596e70f57ae3686f27cf"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977765129750.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:779] {"bwd_compilation_metrics": {"compile_id": "2/0", "inductor_compile_time_s": 3.4067680835723877, "code_gen_time_s": 2.149840831756592, "fail_type": null, "fail_reason": null}, "frame_id": 2, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:05.129000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "69414400ce6973315a6724bd1dd0d6bf"}
+	{
+	"name": "compile_fx.<locals>.bw_compiler",
+	"ts": 1722977765129957.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.130000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "847a3ca2b47dfe66846b3cdd55fea2ea"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977765130112.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.130000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "e344de4b4e97bc4240303d1f7dcda48b"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977765130344.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.132000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "20f2823ad218c39ac107385bbcfa08cf"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765132271.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.223000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "d0d90302be22d31408953ec1b96010a8"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765223053.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.223000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "043f9463cae9b57649917ba2b04a7e1e"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765223638.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.323000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "aa7c41d96ff04f6514db96103ec2287b"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765323165.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.323000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "384acc1950f5815398f79747e3f5e001"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765323857.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.462000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "51e9926bccb19798777f8000fe5635cb"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765462544.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.463000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a81cd6152b3cf0c659966fd5b2b9c485"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765463647.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.561000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f5ac2eb196831b6937a7e75656a3ad23"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765561239.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.562000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36e548f418cc6c70c9ef91f3523765e7"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765562200.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.654000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "adfd278d27ad807d526a30932e2bc11d"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765654414.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.659000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "7479b2b73e4e225784e8b098e9479691"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765659076.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.749000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36758ab08d320e75bb2c38aa80d3ab33"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765749824.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.750000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ba5bcd94c34aa67b5fbeea5198740fbe"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765750428.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.855000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "376b0d8bcb7c3e527a0c1824b0cddd6d"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765855913.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.856000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "90afeea3d97dee27dc597124faf2bbab"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765856592.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.974000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "fe5fe351f15c2f002287da26abdc3f46"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1722977765974828.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.977000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}]}, "frame_id": 5, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:05.978000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4a166952c3eac616029712f421ffc6f6"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977765977996.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.978000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f367db4c90368f8adca1c5928e8f1ecd"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977765978102.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.987000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 5, "frame_compile_id": 0, "attempt": 1, "has_payload": "355c5f9c92c9f13aaa98a227db5202aa"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self)
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 94206130500624)                 
+	
+V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "82bed6465739fc263981b76f23f4da3f"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977765987554.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2a804cb02fc3bf2d3e4e8933c6436465"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977765987622.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.987000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "5/0", "frame_key": "6", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 445, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 6, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977765.977939, "entire_frame_compile_time_s": 0.009676456451416016, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/jjwu/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.005461215972900391, "has_guarded_code": true}, "frame_id": 5, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:05.988000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", 4]}
+V0806 13:56:05.988000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 458, "name": "wrapper", "filename": 4}]}, "frame_id": 6, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:05.988000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "07efc42e14dae367a419acb8428c4b5a"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977765988338.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.988000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "8b6fb54af61bcb682883a1f983d168eb"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977765988403.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.998000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 6, "frame_compile_id": 0, "attempt": 1, "has_payload": "82dd580f72d0e44676dd3f7c6f142b65"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['args'], accessed_by=DictGetItemGuardAccessor(args)
+	| | +- TYPE_MATCH: ___check_type_id(L['args'], 94206128741824)                 
+	| | +- LENGTH_CHECK: len(L['args']) == 1                                         
+	| | +- GuardManager: source=L['args'][0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | +- ID_MATCH: ___check_obj_id(L['args'][0], 140561654732528)              
+	| | | +- DictSubclassGuardManager: source=L['args'][0]._optimizer_step_pre_hooks, accessed_by=GetAttrGuardAccessor(_optimizer_step_pre_hooks)
+	| +- GuardManager: source=L['func'], accessed_by=DictGetItemGuardAccessor(func)
+	| | +- GuardManager: source=L['func'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | +- ID_MATCH: ___check_obj_id(L['func'].__code__, 140563175560928)        
+	| +- GuardManager: source=L['kwargs'], accessed_by=DictGetItemGuardAccessor(kwargs)
+	| | +- DICT_LENGTH: not L['kwargs']                                             
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['cast'], accessed_by=DictGetItemGuardAccessor(cast)
+	| | | +- ID_MATCH: ___check_obj_id(G['cast'], 140565182496976)                 
+	| | +- GuardManager: source=G['chain'], accessed_by=DictGetItemGuardAccessor(chain)
+	| | | +- ID_MATCH: ___check_obj_id(G['chain'], 94206128678976)                 
+	| | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor(torch)
+	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140565184683664)                
+	| | | +- GuardManager: source=G['torch'].autograd, accessed_by=GetAttrGuardAccessor(autograd)
+	| | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd, 140563304246272)       
+	| | | | +- GuardManager: source=G['torch'].autograd.profiler, accessed_by=GetAttrGuardAccessor(profiler)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd.profiler, 140563303232784)
+	| | | | | +- GuardManager: source=G['torch'].autograd.profiler.record_function, accessed_by=GetAttrGuardAccessor(record_function)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['torch'].autograd.profiler.record_function, 94206190338752)
+	| | +- GuardManager: source=G['Optimizer'], accessed_by=DictGetItemGuardAccessor(Optimizer)
+	| | | +- ID_MATCH: ___check_obj_id(G['Optimizer'], 94206202190960)             
+	| | +- DictSubclassGuardManager: source=G['_global_optimizer_pre_hooks'], accessed_by=DictGetItemGuardAccessor(_global_optimizer_pre_hooks)
+	
+V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0f43d692b85e14a4ad120ac1f7fc6fa0"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977765998224.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "aa969914199ac5895a47db38f95b4122"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977765998296.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:05.998000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "6/0", "frame_key": "8", "co_name": "wrapper", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", "co_firstlineno": 458, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 21, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 0, "graph_input_count": 0, "start_time": 1722977765.9883175, "entire_frame_compile_time_s": 0.010008811950683594, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/jjwu/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.0030748844146728516, "has_guarded_code": true}, "frame_id": 6, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:05.999000 4107173 torch/_logging/structured.py:22] {"str": ["/data/users/jjwu/a/pytorch/torch/optim/adam.py", 5]}
+V0806 13:56:05.999000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 478, "name": "wrapper", "filename": 4}, {"line": 90, "name": "_use_grad", "filename": 4}, {"line": 197, "name": "step", "filename": 5}]}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:05.999000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "710e3b9f076a27c725bfcbb09fe8757e"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977765999192.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:05.999000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "36c624efa1d29acebea4d1cbc6eefad3"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977765999253.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0166200>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 0, "grad": 1, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12980>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.002000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 0, "source": "L['self'].param_groups[0]['params'][0]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 2, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 3, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0166480>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 2, "grad": 3, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca100e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.003000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 2, "source": "L['self'].param_groups[0]['params'][1]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 4, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 5, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 5, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0166bb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 4, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 4, "grad": 5, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12840>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.004000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 4, "source": "L['self'].param_groups[0]['params'][2]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 6, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 7, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 7, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c01672e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 6, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 6, "grad": 7, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca128e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.005000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 6, "source": "L['self'].param_groups[0]['params'][3]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 8, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 9, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 9, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0167a60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 8, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 8, "grad": 9, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12520>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.006000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 8, "source": "L['self'].param_groups[0]['params'][4]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 10, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 11, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 11, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 11, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c01381d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 10, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 10, "grad": 11, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca124d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 10, "source": "L['self'].param_groups[0]['params'][5]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 12, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.007000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 13, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 13, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 13, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0138900>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 12, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 12, "grad": 13, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 12, "source": "L['self'].param_groups[0]['params'][6]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 14, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 15, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.008000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 15, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 15, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0138f40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 14, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 14, "grad": 15, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12340>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 14, "source": "L['self'].param_groups[0]['params'][7]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 16, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 17, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 17, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 17, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0139670>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 16, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 16, "grad": 17, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.009000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 16, "source": "L['self'].param_groups[0]['params'][8]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 18, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 19, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 19, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 19, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0139da0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 18, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 18, "grad": 19, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.010000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 18, "source": "L['self'].param_groups[0]['params'][9]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 20, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 21, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 21, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 21, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c013a430>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 20, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 20, "grad": 21, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11fd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.011000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 20, "source": "L['self'].param_groups[0]['params'][10]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 22, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 23, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 23, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 23, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c013aa70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 22, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 22, "grad": 23, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12110>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.012000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 22, "source": "L['self'].param_groups[0]['params'][11]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 24, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 25, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 25, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 25, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c013b150>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 24, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 24, "grad": 25, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 24, "source": "L['self'].param_groups[0]['params'][12]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.013000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 26, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 27, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 27, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 27, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c013b790>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 26, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 26, "grad": 27, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 26, "source": "L['self'].param_groups[0]['params'][13]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 28, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 29, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 29, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 29, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c013bdd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.014000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 28, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 28, "grad": 29, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca121b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 28, "source": "L['self'].param_groups[0]['params'][14]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 30, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 31, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 31, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 31, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0128450>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 30, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 30, "grad": 31, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12570>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.015000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 30, "source": "L['self'].param_groups[0]['params'][15]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 32, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 33, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 33, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 33, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0128ae0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 32, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 32, "grad": 33, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11cb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.016000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 32, "source": "L['self'].param_groups[0]['params'][16]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 34, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 35, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 35, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 35, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0129120>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 34, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 34, "grad": 35, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11df0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.017000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 34, "source": "L['self'].param_groups[0]['params'][17]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 36, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 37, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 37, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 37, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c01297b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 36, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 36, "grad": 37, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.018000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 36, "source": "L['self'].param_groups[0]['params'][18]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 38, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 39, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 39, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 39, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0129e40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 38, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 38, "grad": 39, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 38, "source": "L['self'].param_groups[0]['params'][19]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 40, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.019000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 41, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 41, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 41, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c012a4d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 40, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 40, "grad": 41, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11f80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 40, "source": "L['self'].param_groups[0]['params'][20]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 42, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 43, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.020000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 43, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 43, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c012ab60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 42, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 42, "grad": 43, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12390>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 42, "source": "L['self'].param_groups[0]['params'][21]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 44, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 45, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 45, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 45, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c012b240>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 44, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 44, "grad": 45, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.021000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 44, "source": "L['self'].param_groups[0]['params'][22]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 46, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 47, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 47, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 47, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c012b8d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 46, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 46, "grad": 47, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.022000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 46, "source": "L['self'].param_groups[0]['params'][23]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 48, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 49, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 49, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 49, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c012bf60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 48, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 48, "grad": 49, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.023000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 48, "source": "L['self'].param_groups[0]['params'][24]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 50, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 51, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 51, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 51, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c01045e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 50, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 50, "grad": 51, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca117b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 50, "source": "L['self'].param_groups[0]['params'][25]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.024000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 52, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 53, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 53, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 53, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0104c20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 52, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 52, "grad": 53, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11bc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 52, "source": "L['self'].param_groups[0]['params'][26]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 54, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 55, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.025000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 55, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 55, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0105260>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 54, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 54, "grad": 55, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11ee0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 54, "source": "L['self'].param_groups[0]['params'][27]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 56, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 57, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 57, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 57, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0105940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 56, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 56, "grad": 57, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11710>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.026000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 56, "source": "L['self'].param_groups[0]['params'][28]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 58, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 59, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 59, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 59, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0105fd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 58, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 58, "grad": 59, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11800>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.027000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 58, "source": "L['self'].param_groups[0]['params'][29]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 60, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 61, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 61, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 61, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0106610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 60, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 60, "grad": 61, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca116c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.028000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 60, "source": "L['self'].param_groups[0]['params'][30]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 62, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 63, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 63, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 63, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0106ca0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 62, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 62, "grad": 63, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.029000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 62, "source": "L['self'].param_groups[0]['params'][31]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 64, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 65, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 65, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 65, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c01072e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 64, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 64, "grad": 65, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca118a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 64, "source": "L['self'].param_groups[0]['params'][32]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 66, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.030000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 67, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 67, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 67, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0107920>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 66, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 66, "grad": 67, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 66, "source": "L['self'].param_groups[0]['params'][33]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 68, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 69, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 69, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 69, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0107fb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.031000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 68, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 68, "grad": 69, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11350>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 68, "source": "L['self'].param_groups[0]['params'][34]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 70, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 71, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 71, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 71, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b46d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 70, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 70, "grad": 71, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11300>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.032000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 70, "source": "L['self'].param_groups[0]['params'][35]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 72, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 73, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 73, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 73, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b4db0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 72, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 72, "grad": 73, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11580>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.033000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 72, "source": "L['self'].param_groups[0]['params'][36]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 74, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 75, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 75, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 75, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b53a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 74, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 74, "grad": 75, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.034000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 74, "source": "L['self'].param_groups[0]['params'][37]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 76, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 77, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 77, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 77, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b5a30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 76, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 76, "grad": 77, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca114e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 76, "source": "L['self'].param_groups[0]['params'][38]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.035000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 78, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 79, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 79, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 79, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b6070>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 78, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 78, "grad": 79, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11850>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 78, "source": "L['self'].param_groups[0]['params'][39]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 80, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 81, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.036000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 81, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 81, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b6660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 80, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 80, "grad": 81, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 80, "source": "L['self'].param_groups[0]['params'][40]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 82, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 83, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 83, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 83, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b6c50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 82, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 82, "grad": 83, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10fe0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.037000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 82, "source": "L['self'].param_groups[0]['params'][41]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 84, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 85, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 85, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 85, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b72e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 84, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 84, "grad": 85, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11120>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.038000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 84, "source": "L['self'].param_groups[0]['params'][42]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 86, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 87, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 87, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 87, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b7970>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 86, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 86, "grad": 87, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ef0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.039000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 86, "source": "L['self'].param_groups[0]['params'][43]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 88, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 89, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 89, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 89, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00b7c90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 88, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 88, "grad": 89, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca111c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.040000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 88, "source": "L['self'].param_groups[0]['params'][44]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 90, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 91, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 91, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 91, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00c86d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 90, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 90, "grad": 91, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11260>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 90, "source": "L['self'].param_groups[0]['params'][45]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 92, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.041000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 93, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 93, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 93, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00c8db0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 92, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 92, "grad": 93, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 92, "source": "L['self'].param_groups[0]['params'][46]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 94, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 95, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 95, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 95, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00c9440>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.042000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 94, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 94, "grad": 95, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 94, "source": "L['self'].param_groups[0]['params'][47]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 96, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 97, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 97, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 97, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00c9ad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 96, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 96, "grad": 97, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ea0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.043000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 96, "source": "L['self'].param_groups[0]['params'][48]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 98, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 99, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 99, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 99, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00ca160>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 98, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 98, "grad": 99, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.044000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 98, "source": "L['self'].param_groups[0]['params'][49]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 100, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 101, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 101, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 101, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00ca7f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 100, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 100, "grad": 101, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.045000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 100, "source": "L['self'].param_groups[0]['params'][50]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 102, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 103, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 103, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 103, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00cae80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 102, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 102, "grad": 103, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11170>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 102, "source": "L['self'].param_groups[0]['params'][51]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.046000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 104, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 105, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 105, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 105, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00cb560>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 104, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 104, "grad": 105, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10630>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 104, "source": "L['self'].param_groups[0]['params'][52]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 106, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.047000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 107, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 107, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 107, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00cbbf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 106, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 106, "grad": 107, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 106, "source": "L['self'].param_groups[0]['params'][53]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 108, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 109, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 109, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 109, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dc2c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 108, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 108, "grad": 109, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10ae0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.048000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 108, "source": "L['self'].param_groups[0]['params'][54]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 110, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 111, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 111, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 111, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dc950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 110, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 110, "grad": 111, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10900>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.049000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 110, "source": "L['self'].param_groups[0]['params'][55]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 112, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 113, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 113, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 113, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dcf90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 112, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 112, "grad": 113, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10bd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.050000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 112, "source": "L['self'].param_groups[0]['params'][56]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 114, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 115, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 115, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 115, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dd620>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 114, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 114, "grad": 115, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10e50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 114, "source": "L['self'].param_groups[0]['params'][57]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.051000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 116, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 117, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 117, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 117, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00ddd00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 116, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 116, "grad": 117, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10310>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 116, "source": "L['self'].param_groups[0]['params'][58]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 118, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.052000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 119, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 119, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 119, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00de390>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 118, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 118, "grad": 119, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10590>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 118, "source": "L['self'].param_groups[0]['params'][59]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 120, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 121, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.053000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 121, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 121, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dea70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 120, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 120, "grad": 121, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10770>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 120, "source": "L['self'].param_groups[0]['params'][60]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 122, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 123, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 123, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 123, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00df100>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 122, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 122, "grad": 123, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10540>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.054000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 122, "source": "L['self'].param_groups[0]['params'][61]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 124, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 125, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 125, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 125, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00df790>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 124, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 124, "grad": 125, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca106d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.055000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 124, "source": "L['self'].param_groups[0]['params'][62]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 126, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 127, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 127, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 127, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00dfe20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 126, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 126, "grad": 127, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10a90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.056000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 126, "source": "L['self'].param_groups[0]['params'][63]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 128, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 129, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 129, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 129, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a4540>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 128, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 128, "grad": 129, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca101d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 128, "source": "L['self'].param_groups[0]['params'][64]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.057000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 130, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 131, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 131, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 131, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a4bd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 130, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 130, "grad": 131, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10270>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 130, "source": "L['self'].param_groups[0]['params'][65]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 132, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.058000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 133, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 133, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 133, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a52b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 132, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 132, "grad": 133, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca103b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 132, "source": "L['self'].param_groups[0]['params'][66]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 134, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 135, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 135, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 135, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a5940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 134, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 134, "grad": 135, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.059000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 134, "source": "L['self'].param_groups[0]['params'][67]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 136, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 137, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 137, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 137, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a5fd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 136, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 136, "grad": 137, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.060000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 136, "source": "L['self'].param_groups[0]['params'][68]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 138, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 139, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 139, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 139, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a6660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 138, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 138, "grad": 139, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca108b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.061000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 138, "source": "L['self'].param_groups[0]['params'][69]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 140, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 141, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 141, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 141, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a6c50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 140, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 140, "grad": 141, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11da0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.062000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 140, "source": "L['self'].param_groups[0]['params'][70]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 142, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 143, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 143, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 143, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a72e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 142, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 142, "grad": 143, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12020>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 142, "source": "L['self'].param_groups[0]['params'][71]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 144, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.063000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 145, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 145, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 145, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00a79c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 144, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 144, "grad": 145, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12700>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 144, "source": "L['self'].param_groups[0]['params'][72]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 146, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 147, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.064000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 147, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 147, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0090090>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 146, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 146, "grad": 147, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca119e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 146, "source": "L['self'].param_groups[0]['params'][73]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 148, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 149, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 149, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 149, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0090680>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 148, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 148, "grad": 149, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca123e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.065000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 148, "source": "L['self'].param_groups[0]['params'][74]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 150, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 151, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 151, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 151, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0090d10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 150, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 150, "grad": 151, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca104f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.066000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 150, "source": "L['self'].param_groups[0]['params'][75]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 152, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 153, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 153, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 153, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00913f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 152, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 152, "grad": 153, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.067000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 152, "source": "L['self'].param_groups[0]['params'][76]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 154, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 155, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 155, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 155, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0091a80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 154, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 154, "grad": 155, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10cc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 154, "source": "L['self'].param_groups[0]['params'][77]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.068000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 156, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 157, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 157, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 157, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0092160>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 156, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 156, "grad": 157, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11080>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 156, "source": "L['self'].param_groups[0]['params'][78]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 158, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.069000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 159, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 159, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 159, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00927f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 158, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 158, "grad": 159, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca105e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 158, "source": "L['self'].param_groups[0]['params'][79]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 160, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 161, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 161, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 161, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0092e80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 160, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 160, "grad": 161, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11670>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.070000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 160, "source": "L['self'].param_groups[0]['params'][80]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 162, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 163, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 163, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 163, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c00934c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 162, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 162, "grad": 163, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12ac0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.071000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 162, "source": "L['self'].param_groups[0]['params'][81]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 164, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 165, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 165, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 165, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c0093ba0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 164, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 164, "grad": 165, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12a70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.072000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 164, "source": "L['self'].param_groups[0]['params'][82]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 166, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 167, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 167, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 167, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007c270>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 166, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 166, "grad": 167, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12b10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.073000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 166, "source": "L['self'].param_groups[0]['params'][83]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 168, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 169, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 169, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 169, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007c950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 168, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 168, "grad": 169, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca125c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.074000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 168, "source": "L['self'].param_groups[0]['params'][84]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 170, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 171, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 171, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 171, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007cf90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 170, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 170, "grad": 171, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca120c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.075000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 170, "source": "L['self'].param_groups[0]['params'][85]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 172, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 173, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 173, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 173, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007d5d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 172, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 172, "grad": 173, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12cf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.076000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 172, "source": "L['self'].param_groups[0]['params'][86]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 174, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 175, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 175, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 175, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007dc60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 174, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 174, "grad": 175, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca112b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 174, "source": "L['self'].param_groups[0]['params'][87]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.077000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 176, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 177, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 177, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 177, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007e340>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 176, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 176, "grad": 177, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12430>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 176, "source": "L['self'].param_groups[0]['params'][88]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 178, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.078000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 179, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 179, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 179, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007e9d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 178, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 178, "grad": 179, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca126b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 178, "source": "L['self'].param_groups[0]['params'][89]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 180, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 181, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 181, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 181, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007f0b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 180, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 180, "grad": 181, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.079000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 180, "source": "L['self'].param_groups[0]['params'][90]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 182, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 183, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 183, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 183, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c007f740>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 182, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 182, "grad": 183, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12200>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.080000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 182, "source": "L['self'].param_groups[0]['params'][91]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 184, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 185, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 185, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 185, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d8742070>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.385000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 184, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 184, "grad": 185, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca129d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 184, "source": "L['self'].param_groups[0]['params'][92]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 186, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 187, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.386000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 187, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 187, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d8717f10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 186, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 186, "grad": 187, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10360>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 186, "source": "L['self'].param_groups[0]['params'][93]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 188, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 189, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 189, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 189, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d5b59c60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 188, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 188, "grad": 189, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.387000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 188, "source": "L['self'].param_groups[0]['params'][94]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 190, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 191, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 191, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 191, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d87451c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 190, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 190, "grad": 191, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12160>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.388000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 190, "source": "L['self'].param_groups[0]['params'][95]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 192, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 193, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 193, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 193, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d5b3c4f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 192, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 192, "grad": 193, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca122a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.389000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 192, "source": "L['self'].param_groups[0]['params'][96]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 194, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 195, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 195, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 195, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6cd7df3d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 194, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 194, "grad": 195, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11e90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.390000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 194, "source": "L['self'].param_groups[0]['params'][97]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 196, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 197, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 197, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 197, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6cd7f52b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 196, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 196, "grad": 197, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.391000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 196, "source": "L['self'].param_groups[0]['params'][98]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 198, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 199, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 199, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 199, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c226b380>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 198, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 198, "grad": 199, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca127f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.392000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 198, "source": "L['self'].param_groups[0]['params'][99]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 200, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 201, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 201, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 201, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6cd72cef0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 200, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 200, "grad": 201, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11a30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 200, "source": "L['self'].param_groups[0]['params'][100]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.393000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 202, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 203, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 203, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 203, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1f5e250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 202, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 202, "grad": 203, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11b20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 202, "source": "L['self'].param_groups[0]['params'][101]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 204, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 205, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.394000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 205, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 205, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d87ce4d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 204, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 204, "grad": 205, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11c10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 204, "source": "L['self'].param_groups[0]['params'][102]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 206, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 207, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 207, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 207, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1fb3fb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 206, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 206, "grad": 207, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.395000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 206, "source": "L['self'].param_groups[0]['params'][103]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 208, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 209, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 209, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 209, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1b06d40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 208, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 208, "grad": 209, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11d00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.396000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 208, "source": "L['self'].param_groups[0]['params'][104]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 210, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 211, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 211, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 211, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1bd12b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 210, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 210, "grad": 211, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11990>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.397000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 210, "source": "L['self'].param_groups[0]['params'][105]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 212, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 213, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 213, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 213, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1a6fce0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 212, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 212, "grad": 213, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11490>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.398000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 212, "source": "L['self'].param_groups[0]['params'][106]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 214, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 215, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 215, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 215, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1b958a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 214, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 214, "grad": 215, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11530>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 214, "source": "L['self'].param_groups[0]['params'][107]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.399000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 216, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 217, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 217, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 217, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1502250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 216, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 216, "grad": 217, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca115d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 216, "source": "L['self'].param_groups[0]['params'][108]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 218, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.400000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 219, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 219, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 219, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6cd78d210>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 218, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 218, "grad": 219, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11440>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 218, "source": "L['self'].param_groups[0]['params'][109]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 220, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 221, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 221, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 221, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c22f1940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.401000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 220, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 220, "grad": 221, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10b80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 220, "source": "L['self'].param_groups[0]['params'][110]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 222, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 223, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 223, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 223, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1583970>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 222, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 222, "grad": 223, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca113f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.402000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 222, "source": "L['self'].param_groups[0]['params'][111]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 224, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 225, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 225, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 225, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1556840>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 224, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 224, "grad": 225, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10f90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.403000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 224, "source": "L['self'].param_groups[0]['params'][112]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 226, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 227, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 227, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 227, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c15c2700>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 226, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 226, "grad": 227, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10680>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.404000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 226, "source": "L['self'].param_groups[0]['params'][113]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 228, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 229, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 229, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 229, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c15e0180>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 228, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 228, "grad": 229, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11030>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.405000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 228, "source": "L['self'].param_groups[0]['params'][114]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 230, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 231, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 231, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 231, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d5b87ba0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 230, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 230, "grad": 231, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10db0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 230, "source": "L['self'].param_groups[0]['params'][115]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 232, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.406000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 233, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 233, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 233, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c224e610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 232, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 232, "grad": 233, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11210>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 232, "source": "L['self'].param_groups[0]['params'][116]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 234, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 235, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 235, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 235, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1ae14e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.407000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 234, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 234, "grad": 235, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca11760>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 234, "source": "L['self'].param_groups[0]['params'][117]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 236, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 237, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 237, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 237, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1e23d80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 236, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 236, "grad": 237, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12d90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.408000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 236, "source": "L['self'].param_groups[0]['params'][118]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 238, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 239, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 239, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 239, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6c1eafc90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 238, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 238, "grad": 239, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca107c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.409000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 238, "source": "L['self'].param_groups[0]['params'][119]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 240, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 241, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 241, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 241, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d877a520>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 240, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 240, "grad": 241, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10c70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.410000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 240, "source": "L['self'].param_groups[0]['params'][120]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 242, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 243, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 243, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 243, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd707dfdad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 242, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 242, "grad": 243, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10860>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.411000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 242, "source": "L['self'].param_groups[0]['params'][121]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 244, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 245, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 245, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 245, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6de9c9d00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 244, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 244, "grad": 245, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca109f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 244, "source": "L['self'].param_groups[0]['params'][122]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.412000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 246, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 247, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 247, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 247, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a284f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 246, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 246, "grad": 247, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca110d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 246, "source": "L['self'].param_groups[0]['params'][123]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 248, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 249, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 249, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 249, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a2a890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.413000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 248, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 248, "grad": 249, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca102c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 248, "source": "L['self'].param_groups[0]['params'][124]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 250, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 251, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 251, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 251, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a2a700>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 250, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 250, "grad": 251, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10450>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.414000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 250, "source": "L['self'].param_groups[0]['params'][125]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 252, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 253, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 253, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 253, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a2a200>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 252, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 252, "grad": 253, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd7e1f63740>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.415000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 252, "source": "L['self'].param_groups[0]['params'][126]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 254, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 255, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 255, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 255, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a2bc90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 254, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 254, "grad": 255, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10220>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.416000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 254, "source": "L['self'].param_groups[0]['params'][127]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 256, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 257, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 257, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 257, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a686d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 256, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 256, "grad": 257, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10810>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.417000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 256, "source": "L['self'].param_groups[0]['params'][128]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 258, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 259, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 259, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 259, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6ddfe1bc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 258, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 258, "grad": 259, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10d60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 258, "source": "L['self'].param_groups[0]['params'][129]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 260, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.418000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 261, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 261, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 261, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a1b0b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 260, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 260, "grad": 261, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 260, "source": "L['self'].param_groups[0]['params'][130]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 262, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 263, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.419000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 263, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 263, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a193a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 262, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 262, "grad": 263, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 262, "source": "L['self'].param_groups[0]['params'][131]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 264, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 265, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 265, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 265, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a18f40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 264, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 264, "grad": 265, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fdd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.420000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 264, "source": "L['self'].param_groups[0]['params'][132]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 266, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 267, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 267, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 267, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a18a40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 266, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 266, "grad": 267, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fe70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.421000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 266, "source": "L['self'].param_groups[0]['params'][133]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 268, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 269, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 269, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 269, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a1b330>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 268, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 268, "grad": 269, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12750>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.422000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 268, "source": "L['self'].param_groups[0]['params'][134]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 270, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 271, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 271, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 271, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a19580>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 270, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 270, "grad": 271, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10720>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.423000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 270, "source": "L['self'].param_groups[0]['params'][135]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 272, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 273, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 273, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 273, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a1a3e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 272, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 272, "grad": 273, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 272, "source": "L['self'].param_groups[0]['params'][136]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.424000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 274, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 275, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 275, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 275, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0baf470>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 274, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 274, "grad": 275, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fbf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 274, "source": "L['self'].param_groups[0]['params'][137]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 276, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 277, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.425000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 277, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 277, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a56890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 276, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 276, "grad": 277, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 276, "source": "L['self'].param_groups[0]['params'][138]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 278, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 279, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 279, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 279, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a56750>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.426000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 278, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 278, "grad": 279, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fc40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 278, "source": "L['self'].param_groups[0]['params'][139]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 280, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 281, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 281, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 281, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a571a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 280, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 280, "grad": 281, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6ff10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.427000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 280, "source": "L['self'].param_groups[0]['params'][140]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 282, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 283, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 283, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 283, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0a558f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 282, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 282, "grad": 283, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fec0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.428000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 282, "source": "L['self'].param_groups[0]['params'][141]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 284, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 285, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 285, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 285, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb22a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 284, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [768, 1], "storage": 284, "grad": 285, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fba0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.429000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 284, "source": "L['self'].param_groups[0]['params'][142]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 286, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 287, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 287, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 287, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb3290>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 286, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 286, "grad": 287, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fa60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 286, "source": "L['self'].param_groups[0]['params'][143]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.430000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 288, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 289, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 289, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 289, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb2980>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 288, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [3072, 1], "storage": 288, "grad": 289, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fd30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 288, "source": "L['self'].param_groups[0]['params'][144]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 290, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.431000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 291, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 291, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 291, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb13f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 290, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 290, "grad": 291, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70cb6fab0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 290, "source": "L['self'].param_groups[0]['params'][145]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 292, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 293, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 293, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 293, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb34c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 292, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 292, "grad": 293, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca12c00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.432000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 292, "source": "L['self'].param_groups[0]['params'][146]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 294, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 295, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 295, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 295, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e0bb1440>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 294, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 294, "grad": 295, "view_func": "<built-in method _view_func_unsafe of Parameter object at 0x7fd70ca10130>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.433000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 294, "source": "L['self'].param_groups[0]['params'][147]"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 296, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 296, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 296, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f7e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.440000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 296, "source": "L['self'].state[list(L['self'].state.keys())[1]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.441000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 297, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.441000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 297, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 297, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f650>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 297, "source": "L['self'].state[list(L['self'].state.keys())[1]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 298, "describer_id": 312, "size": 3145728}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 298, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 768], "is_leaf": true, "stride": [768, 1], "storage": 298, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f740>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.442000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 298, "source": "L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 299, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 299, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 299, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6fb00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.473000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 299, "source": "L['self'].state[list(L['self'].state.keys())[0]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 300, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 300, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 300, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f6a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.474000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 300, "source": "L['self'].state[list(L['self'].state.keys())[2]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 301, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 301, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 301, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f600>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 301, "source": "L['self'].state[list(L['self'].state.keys())[3]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 302, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 302, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 302, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f290>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.475000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 302, "source": "L['self'].state[list(L['self'].state.keys())[4]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 303, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 303, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 303, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f2e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 303, "source": "L['self'].state[list(L['self'].state.keys())[5]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.476000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 304, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 304, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 304, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f240>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 304, "source": "L['self'].state[list(L['self'].state.keys())[6]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 305, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 305, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 305, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6eed0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.477000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 305, "source": "L['self'].state[list(L['self'].state.keys())[7]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 306, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 306, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 306, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ef20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 306, "source": "L['self'].state[list(L['self'].state.keys())[8]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 307, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.478000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 307, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 307, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ee80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 307, "source": "L['self'].state[list(L['self'].state.keys())[9]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 308, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 308, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 308, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d6c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.479000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 308, "source": "L['self'].state[list(L['self'].state.keys())[10]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 309, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 309, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 309, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d670>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 309, "source": "L['self'].state[list(L['self'].state.keys())[11]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 310, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 310, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 310, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d3f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.480000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 310, "source": "L['self'].state[list(L['self'].state.keys())[12]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 311, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 311, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 311, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6da80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 311, "source": "L['self'].state[list(L['self'].state.keys())[13]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 312, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.481000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 312, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 312, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 312, "source": "L['self'].state[list(L['self'].state.keys())[14]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 313, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 313, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 313, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.482000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 313, "source": "L['self'].state[list(L['self'].state.keys())[15]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 314, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 314, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 314, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ddf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 314, "source": "L['self'].state[list(L['self'].state.keys())[16]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 315, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 315, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 315, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dc60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.483000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 315, "source": "L['self'].state[list(L['self'].state.keys())[17]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 316, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 316, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 316, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dda0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 316, "source": "L['self'].state[list(L['self'].state.keys())[18]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.484000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 317, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 317, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 317, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6de40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 317, "source": "L['self'].state[list(L['self'].state.keys())[19]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 318, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 318, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 318, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e1b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.485000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 318, "source": "L['self'].state[list(L['self'].state.keys())[20]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 319, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 319, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 319, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e0c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 319, "source": "L['self'].state[list(L['self'].state.keys())[21]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 320, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 320, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 320, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e5c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.486000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 320, "source": "L['self'].state[list(L['self'].state.keys())[22]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 321, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 321, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 321, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e570>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 321, "source": "L['self'].state[list(L['self'].state.keys())[23]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 322, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.487000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 322, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 322, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 322, "source": "L['self'].state[list(L['self'].state.keys())[24]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 323, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 323, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 323, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.488000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 323, "source": "L['self'].state[list(L['self'].state.keys())[25]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 324, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 324, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 324, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e8e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 324, "source": "L['self'].state[list(L['self'].state.keys())[26]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 325, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 325, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 325, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e9d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.489000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 325, "source": "L['self'].state[list(L['self'].state.keys())[27]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 326, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 326, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 326, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ed40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 326, "source": "L['self'].state[list(L['self'].state.keys())[28]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.490000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 327, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 327, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 327, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6eca0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 327, "source": "L['self'].state[list(L['self'].state.keys())[29]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 328, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 328, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 328, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd7159d7920>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.491000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 328, "source": "L['self'].state[list(L['self'].state.keys())[30]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 329, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 329, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 329, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f7628e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 329, "source": "L['self'].state[list(L['self'].state.keys())[31]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 330, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 330, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 330, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f7629d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.492000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 330, "source": "L['self'].state[list(L['self'].state.keys())[32]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 331, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 331, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 331, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f762980>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 331, "source": "L['self'].state[list(L['self'].state.keys())[33]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 332, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 332, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 332, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2700>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.493000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 332, "source": "L['self'].state[list(L['self'].state.keys())[34]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 333, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 333, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 333, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe26b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.494000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 333, "source": "L['self'].state[list(L['self'].state.keys())[35]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 334, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 334, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 334, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2340>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 334, "source": "L['self'].state[list(L['self'].state.keys())[36]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 335, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 335, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 335, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1ee0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.495000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 335, "source": "L['self'].state[list(L['self'].state.keys())[37]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 336, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 336, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 336, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1da0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 336, "source": "L['self'].state[list(L['self'].state.keys())[38]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 337, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.496000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 337, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 337, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1b20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 337, "source": "L['self'].state[list(L['self'].state.keys())[39]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 338, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 338, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 338, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1800>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.497000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 338, "source": "L['self'].state[list(L['self'].state.keys())[40]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 339, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 339, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 339, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe16c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 339, "source": "L['self'].state[list(L['self'].state.keys())[41]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 340, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 340, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 340, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1490>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.498000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 340, "source": "L['self'].state[list(L['self'].state.keys())[42]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 341, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 341, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 341, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1170>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 341, "source": "L['self'].state[list(L['self'].state.keys())[43]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 342, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.499000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 342, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 342, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1030>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 342, "source": "L['self'].state[list(L['self'].state.keys())[44]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 343, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 343, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 343, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0e00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.500000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 343, "source": "L['self'].state[list(L['self'].state.keys())[45]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 344, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 344, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 344, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0ae0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 344, "source": "L['self'].state[list(L['self'].state.keys())[46]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 345, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 345, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 345, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe09a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.501000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 345, "source": "L['self'].state[list(L['self'].state.keys())[47]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 346, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 346, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 346, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0770>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 346, "source": "L['self'].state[list(L['self'].state.keys())[48]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.502000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 347, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 347, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 347, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe04a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 347, "source": "L['self'].state[list(L['self'].state.keys())[49]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 348, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 348, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 348, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.503000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 348, "source": "L['self'].state[list(L['self'].state.keys())[50]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 349, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 349, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 349, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1580>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 349, "source": "L['self'].state[list(L['self'].state.keys())[51]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 350, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 350, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 350, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0860>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.504000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 350, "source": "L['self'].state[list(L['self'].state.keys())[52]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 351, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 351, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 351, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2480>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 351, "source": "L['self'].state[list(L['self'].state.keys())[53]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 352, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.505000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 352, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 352, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2070>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 352, "source": "L['self'].state[list(L['self'].state.keys())[54]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 353, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 353, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 353, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe18f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.506000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 353, "source": "L['self'].state[list(L['self'].state.keys())[55]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 354, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 354, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 354, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe17b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 354, "source": "L['self'].state[list(L['self'].state.keys())[56]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 355, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 355, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 355, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1440>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.507000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 355, "source": "L['self'].state[list(L['self'].state.keys())[57]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 356, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 356, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 356, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0f90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 356, "source": "L['self'].state[list(L['self'].state.keys())[58]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 357, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 357, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 357, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0cc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.508000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 357, "source": "L['self'].state[list(L['self'].state.keys())[59]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 358, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 358, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 358, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe07c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 358, "source": "L['self'].state[list(L['self'].state.keys())[60]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.509000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 359, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 359, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 359, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0590>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 359, "source": "L['self'].state[list(L['self'].state.keys())[61]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 360, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 360, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 360, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2930>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.510000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 360, "source": "L['self'].state[list(L['self'].state.keys())[62]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 361, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 361, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 361, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2a20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 361, "source": "L['self'].state[list(L['self'].state.keys())[63]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 362, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 362, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 362, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2b60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.511000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 362, "source": "L['self'].state[list(L['self'].state.keys())[64]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 363, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 363, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 363, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2ca0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 363, "source": "L['self'].state[list(L['self'].state.keys())[65]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.512000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 364, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 364, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 364, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2de0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 364, "source": "L['self'].state[list(L['self'].state.keys())[66]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 365, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 365, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 365, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2f20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.513000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 365, "source": "L['self'].state[list(L['self'].state.keys())[67]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 366, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 366, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 366, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3060>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 366, "source": "L['self'].state[list(L['self'].state.keys())[68]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 367, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 367, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 367, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe31a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.514000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 367, "source": "L['self'].state[list(L['self'].state.keys())[69]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 368, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 368, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 368, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe32e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 368, "source": "L['self'].state[list(L['self'].state.keys())[70]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.515000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 369, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 369, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 369, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3420>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 369, "source": "L['self'].state[list(L['self'].state.keys())[71]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 370, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 370, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 370, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3560>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.516000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 370, "source": "L['self'].state[list(L['self'].state.keys())[72]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 371, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 371, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 371, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe36a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 371, "source": "L['self'].state[list(L['self'].state.keys())[73]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 372, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 372, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 372, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe37e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.517000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 372, "source": "L['self'].state[list(L['self'].state.keys())[74]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 373, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 373, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 373, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3920>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 373, "source": "L['self'].state[list(L['self'].state.keys())[75]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.518000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 374, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 374, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 374, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3a60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 374, "source": "L['self'].state[list(L['self'].state.keys())[76]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 375, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 375, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 375, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3ba0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.519000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 375, "source": "L['self'].state[list(L['self'].state.keys())[77]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 376, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 376, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 376, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3ce0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 376, "source": "L['self'].state[list(L['self'].state.keys())[78]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 377, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 377, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 377, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3e20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.520000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 377, "source": "L['self'].state[list(L['self'].state.keys())[79]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 378, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 378, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 378, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3f60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 378, "source": "L['self'].state[list(L['self'].state.keys())[80]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.521000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 379, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 379, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 379, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac0e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 379, "source": "L['self'].state[list(L['self'].state.keys())[81]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 380, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 380, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 380, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac220>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.522000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 380, "source": "L['self'].state[list(L['self'].state.keys())[82]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 381, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 381, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 381, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac360>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 381, "source": "L['self'].state[list(L['self'].state.keys())[83]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 382, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 382, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 382, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac4a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.523000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 382, "source": "L['self'].state[list(L['self'].state.keys())[84]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 383, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 383, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 383, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac5e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 383, "source": "L['self'].state[list(L['self'].state.keys())[85]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 384, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.524000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 384, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 384, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac720>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 384, "source": "L['self'].state[list(L['self'].state.keys())[86]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 385, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 385, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 385, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac860>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.525000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 385, "source": "L['self'].state[list(L['self'].state.keys())[87]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 386, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 386, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 386, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac9a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 386, "source": "L['self'].state[list(L['self'].state.keys())[88]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 387, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 387, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 387, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacae0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.526000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 387, "source": "L['self'].state[list(L['self'].state.keys())[89]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.527000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 388, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.527000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 388, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 388, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacc20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 388, "source": "L['self'].state[list(L['self'].state.keys())[90]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 389, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 389, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 389, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacd60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.528000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 389, "source": "L['self'].state[list(L['self'].state.keys())[91]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 390, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 390, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 390, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacea0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 390, "source": "L['self'].state[list(L['self'].state.keys())[92]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 391, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 391, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 391, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacfe0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.529000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 391, "source": "L['self'].state[list(L['self'].state.keys())[93]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 392, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 392, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 392, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad120>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 392, "source": "L['self'].state[list(L['self'].state.keys())[94]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 393, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 393, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 393, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad260>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.530000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 393, "source": "L['self'].state[list(L['self'].state.keys())[95]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 394, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 394, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 394, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad3a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 394, "source": "L['self'].state[list(L['self'].state.keys())[96]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.531000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 395, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 395, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 395, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad4e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 395, "source": "L['self'].state[list(L['self'].state.keys())[97]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 396, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 396, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 396, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad620>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.532000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 396, "source": "L['self'].state[list(L['self'].state.keys())[98]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 397, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 397, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 397, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad760>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 397, "source": "L['self'].state[list(L['self'].state.keys())[99]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 398, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 398, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 398, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad8a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.533000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 398, "source": "L['self'].state[list(L['self'].state.keys())[100]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 399, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 399, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 399, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad9e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 399, "source": "L['self'].state[list(L['self'].state.keys())[101]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.534000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 400, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 400, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 400, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadb20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 400, "source": "L['self'].state[list(L['self'].state.keys())[102]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 401, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 401, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 401, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadc60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.535000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 401, "source": "L['self'].state[list(L['self'].state.keys())[103]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 402, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 402, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 402, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadda0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 402, "source": "L['self'].state[list(L['self'].state.keys())[104]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 403, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 403, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 403, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadee0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.536000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 403, "source": "L['self'].state[list(L['self'].state.keys())[105]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 404, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 404, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 404, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae020>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 404, "source": "L['self'].state[list(L['self'].state.keys())[106]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 405, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.537000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 405, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 405, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae160>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 405, "source": "L['self'].state[list(L['self'].state.keys())[107]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 406, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 406, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 406, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae2a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.538000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 406, "source": "L['self'].state[list(L['self'].state.keys())[108]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 407, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 407, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 407, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae3e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 407, "source": "L['self'].state[list(L['self'].state.keys())[109]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 408, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 408, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 408, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae520>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.539000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 408, "source": "L['self'].state[list(L['self'].state.keys())[110]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 409, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 409, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 409, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 409, "source": "L['self'].state[list(L['self'].state.keys())[111]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 410, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 410, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 410, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae7a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.540000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 410, "source": "L['self'].state[list(L['self'].state.keys())[112]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 411, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 411, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 411, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae8e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.541000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 411, "source": "L['self'].state[list(L['self'].state.keys())[113]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 412, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 412, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 412, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaea20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 412, "source": "L['self'].state[list(L['self'].state.keys())[114]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 413, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 413, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 413, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaeb60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.542000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 413, "source": "L['self'].state[list(L['self'].state.keys())[115]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 414, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 414, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 414, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaeca0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 414, "source": "L['self'].state[list(L['self'].state.keys())[116]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 415, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 415, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 415, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaede0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.543000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 415, "source": "L['self'].state[list(L['self'].state.keys())[117]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 416, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 416, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 416, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaef20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 416, "source": "L['self'].state[list(L['self'].state.keys())[118]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.544000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 417, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 417, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 417, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf060>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 417, "source": "L['self'].state[list(L['self'].state.keys())[119]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 418, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 418, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 418, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf1a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.545000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 418, "source": "L['self'].state[list(L['self'].state.keys())[120]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 419, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 419, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 419, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf2e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 419, "source": "L['self'].state[list(L['self'].state.keys())[121]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 420, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 420, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 420, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf420>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.546000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 420, "source": "L['self'].state[list(L['self'].state.keys())[122]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 421, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 421, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 421, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf560>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 421, "source": "L['self'].state[list(L['self'].state.keys())[123]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 422, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.547000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 422, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 422, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf6a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 422, "source": "L['self'].state[list(L['self'].state.keys())[124]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 423, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 423, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 423, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf7e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.548000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 423, "source": "L['self'].state[list(L['self'].state.keys())[125]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 424, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 424, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 424, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf920>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 424, "source": "L['self'].state[list(L['self'].state.keys())[126]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 425, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 425, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 425, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafa60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.549000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 425, "source": "L['self'].state[list(L['self'].state.keys())[127]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 426, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 426, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 426, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafba0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 426, "source": "L['self'].state[list(L['self'].state.keys())[128]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 427, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.550000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 427, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 427, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafce0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 427, "source": "L['self'].state[list(L['self'].state.keys())[129]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 428, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 428, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 428, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafe20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.551000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 428, "source": "L['self'].state[list(L['self'].state.keys())[130]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 429, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 429, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 429, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaff60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 429, "source": "L['self'].state[list(L['self'].state.keys())[131]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 430, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 430, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 430, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe80e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.552000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 430, "source": "L['self'].state[list(L['self'].state.keys())[132]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 431, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 431, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 431, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8220>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 431, "source": "L['self'].state[list(L['self'].state.keys())[133]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 432, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.553000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 432, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 432, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8360>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 432, "source": "L['self'].state[list(L['self'].state.keys())[134]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 433, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 433, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 433, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe84a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.554000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 433, "source": "L['self'].state[list(L['self'].state.keys())[135]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 434, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 434, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 434, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe85e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 434, "source": "L['self'].state[list(L['self'].state.keys())[136]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 435, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 435, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 435, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8720>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.555000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 435, "source": "L['self'].state[list(L['self'].state.keys())[137]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 436, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 436, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 436, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8860>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 436, "source": "L['self'].state[list(L['self'].state.keys())[138]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 437, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.556000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 437, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 437, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe89a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 437, "source": "L['self'].state[list(L['self'].state.keys())[139]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 438, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 438, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 438, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8ae0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 438, "source": "L['self'].state[list(L['self'].state.keys())[140]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.557000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 439, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 439, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 439, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8c20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 439, "source": "L['self'].state[list(L['self'].state.keys())[141]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 440, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 440, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 440, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8d60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.558000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 440, "source": "L['self'].state[list(L['self'].state.keys())[142]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 441, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 441, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 441, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8ea0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 441, "source": "L['self'].state[list(L['self'].state.keys())[143]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 442, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 442, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 442, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8fe0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.559000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 442, "source": "L['self'].state[list(L['self'].state.keys())[144]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 443, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 443, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 443, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9120>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 443, "source": "L['self'].state[list(L['self'].state.keys())[145]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.560000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 444, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 444, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 444, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9260>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 444, "source": "L['self'].state[list(L['self'].state.keys())[146]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 445, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 445, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 445, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe93a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.561000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 445, "source": "L['self'].state[list(L['self'].state.keys())[147]]['exp_avg']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 446, "describer_id": 312, "size": 154533888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 446, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [50304, 768], "is_leaf": true, "stride": [768, 1], "storage": 446, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f880>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 446, "source": "L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 447, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 447, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 447, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f510>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.562000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 447, "source": "L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 448, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 448, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 448, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f5b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 448, "source": "L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.563000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 449, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 449, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 449, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f380>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 449, "source": "L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 450, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 450, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 450, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f150>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.564000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 450, "source": "L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 451, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 451, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 451, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f1f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 451, "source": "L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 452, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 452, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 452, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6efc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.565000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 452, "source": "L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 453, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 453, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 453, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ebb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 453, "source": "L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.566000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 454, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 454, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 454, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ee30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 454, "source": "L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 455, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 455, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 455, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d3a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.567000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 455, "source": "L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 456, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 456, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 456, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d710>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 456, "source": "L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 457, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 457, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 457, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d5d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.568000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 457, "source": "L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 458, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 458, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 458, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d800>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 458, "source": "L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.569000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 459, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 459, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 459, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6da30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 459, "source": "L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 460, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 460, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 460, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6db20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.570000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 460, "source": "L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 461, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 461, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 461, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dd50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 461, "source": "L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 462, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 462, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 462, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6df80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.571000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 462, "source": "L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 463, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 463, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 463, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dee0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 463, "source": "L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 464, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.572000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 464, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 464, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e110>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 464, "source": "L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 465, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 465, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 465, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e340>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.573000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 465, "source": "L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 466, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 466, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 466, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 466, "source": "L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 467, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 467, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 467, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e480>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.574000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 467, "source": "L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 468, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 468, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 468, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e6b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 468, "source": "L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 469, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 469, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 469, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e2a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.575000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 469, "source": "L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 470, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 470, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 470, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e4d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 470, "source": "L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.576000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 471, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 471, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 471, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e980>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 471, "source": "L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 472, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 472, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 472, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ea20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.577000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 472, "source": "L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 473, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 473, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 473, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6ec00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 473, "source": "L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 474, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 474, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 474, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e700>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.578000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 474, "source": "L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 475, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 475, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 475, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f7cc9a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 475, "source": "L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.579000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 476, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 476, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 476, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f7622f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 476, "source": "L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 477, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 477, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 477, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f7627f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.580000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 477, "source": "L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 478, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 478, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 478, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2750>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 478, "source": "L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 479, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 479, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 479, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe22a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.581000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 479, "source": "L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 480, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 480, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 480, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe23e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 480, "source": "L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.582000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 481, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 481, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 481, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2160>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 481, "source": "L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 482, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 482, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 482, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1cb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.583000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 482, "source": "L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 483, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 483, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 483, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1d50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 483, "source": "L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 484, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 484, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 484, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1a80>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.584000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 484, "source": "L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 485, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 485, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 485, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe15d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 485, "source": "L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 486, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.585000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 486, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 486, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1670>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 486, "source": "L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 487, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 487, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 487, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe13f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.586000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 487, "source": "L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 488, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 488, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 488, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0f40>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 488, "source": "L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 489, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 489, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 489, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0fe0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.587000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 489, "source": "L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 490, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 490, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 490, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0d60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 490, "source": "L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 491, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 491, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 491, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe08b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.588000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 491, "source": "L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 492, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 492, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 492, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 492, "source": "L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.589000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 493, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 493, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 493, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe06d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 493, "source": "L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 494, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 494, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 494, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe27a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.590000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 494, "source": "L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 495, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 495, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 495, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1f30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.591000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 495, "source": "L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 496, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 496, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 496, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe11c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 496, "source": "L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.592000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 497, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 497, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 497, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0450>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 497, "source": "L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 498, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 498, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 498, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe22f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.593000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 498, "source": "L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 499, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 499, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 499, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1e90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 499, "source": "L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 500, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 500, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 500, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1ad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.594000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 500, "source": "L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 501, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 501, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 501, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1620>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 501, "source": "L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.595000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 502, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 502, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 502, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1350>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 502, "source": "L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 503, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 503, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 503, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0e50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.596000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 503, "source": "L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 504, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 504, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 504, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0c20>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 504, "source": "L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 505, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 505, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 505, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe04f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.597000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 505, "source": "L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 506, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 506, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 506, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.598000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 506, "source": "L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 507, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 507, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 507, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2840>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 507, "source": "L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 508, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 508, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 508, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2a70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.599000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 508, "source": "L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 509, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 509, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 509, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2bb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 509, "source": "L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 510, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 510, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 510, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2cf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.600000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 510, "source": "L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 511, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 511, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 511, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2e30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 511, "source": "L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.601000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 512, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 512, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 512, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2f70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 512, "source": "L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 513, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 513, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 513, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe30b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.602000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 513, "source": "L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 514, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 514, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 514, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe31f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 514, "source": "L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 515, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 515, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 515, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3330>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.603000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 515, "source": "L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 516, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 516, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 516, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3470>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 516, "source": "L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.604000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 517, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 517, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 517, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe35b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 517, "source": "L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 518, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 518, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 518, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe36f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.605000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 518, "source": "L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 519, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 519, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 519, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3830>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 519, "source": "L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 520, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 520, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 520, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3970>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.606000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 520, "source": "L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 521, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 521, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 521, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3ab0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 521, "source": "L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 522, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.607000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 522, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 522, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3bf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 522, "source": "L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 523, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 523, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 523, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3d30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.608000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 523, "source": "L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 524, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 524, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 524, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3e70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 524, "source": "L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 525, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 525, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 525, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3fb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.609000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 525, "source": "L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 526, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 526, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 526, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac130>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 526, "source": "L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 527, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.610000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 527, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 527, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac270>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 527, "source": "L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 528, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 528, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 528, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac3b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.611000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 528, "source": "L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 529, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 529, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 529, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac4f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 529, "source": "L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 530, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 530, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 530, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac630>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.612000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 530, "source": "L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 531, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 531, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 531, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac770>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 531, "source": "L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 532, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 532, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 532, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac8b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.613000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 532, "source": "L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 533, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 533, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 533, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac9f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.614000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 533, "source": "L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 534, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 534, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 534, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacb30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 534, "source": "L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 535, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 535, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 535, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacc70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.615000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 535, "source": "L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 536, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 536, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 536, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacdb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 536, "source": "L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 537, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 537, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 537, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacef0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.616000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 537, "source": "L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 538, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 538, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 538, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad030>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 538, "source": "L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.617000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 539, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 539, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 539, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad170>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 539, "source": "L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 540, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 540, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 540, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad2b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.618000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 540, "source": "L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 541, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 541, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 541, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad3f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 541, "source": "L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 542, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 542, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 542, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad530>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.619000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 542, "source": "L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 543, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 543, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 543, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad670>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 543, "source": "L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.620000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 544, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 544, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 544, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad7b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 544, "source": "L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 545, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 545, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 545, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad8f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.621000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 545, "source": "L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 546, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 546, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 546, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbada30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 546, "source": "L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 547, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 547, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 547, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadb70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.622000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 547, "source": "L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 548, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 548, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 548, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadcb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 548, "source": "L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 549, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.623000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 549, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 549, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaddf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 549, "source": "L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 550, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 550, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 550, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadf30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.624000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 550, "source": "L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 551, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 551, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 551, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae070>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 551, "source": "L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 552, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 552, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 552, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae1b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.625000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 552, "source": "L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 553, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 553, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 553, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae2f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 553, "source": "L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.626000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 554, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 554, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 554, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae430>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 554, "source": "L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 555, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 555, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 555, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae570>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.627000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 555, "source": "L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 556, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 556, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 556, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae6b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 556, "source": "L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 557, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 557, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 557, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae7f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.628000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 557, "source": "L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 558, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 558, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 558, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae930>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 558, "source": "L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.629000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 559, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 559, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 559, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaea70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 559, "source": "L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 560, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 560, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 560, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaebb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.630000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 560, "source": "L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 561, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 561, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 561, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaecf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 561, "source": "L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 562, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 562, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 562, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaee30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.631000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 562, "source": "L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 563, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 563, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 563, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaef70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 563, "source": "L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.632000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 564, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 564, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 564, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf0b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 564, "source": "L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 565, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 565, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 565, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf1f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.633000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 565, "source": "L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 566, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 566, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 566, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf330>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 566, "source": "L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 567, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 567, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 567, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf470>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.634000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 567, "source": "L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 568, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 568, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 568, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf5b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 568, "source": "L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.635000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 569, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 569, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 569, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf6f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 569, "source": "L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 570, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 570, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 570, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf830>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.636000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 570, "source": "L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 571, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 571, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 571, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf970>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 571, "source": "L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 572, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 572, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 572, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafab0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.637000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 572, "source": "L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 573, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 573, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 573, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafbf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 573, "source": "L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 574, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.638000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 574, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 574, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafd30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 574, "source": "L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 575, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 575, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 575, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafe70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.639000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 575, "source": "L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 576, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 576, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 576, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaffb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 576, "source": "L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 577, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 577, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 577, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8130>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.640000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 577, "source": "L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 578, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 578, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 578, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8270>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 578, "source": "L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.641000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 579, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 579, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 579, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe83b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 579, "source": "L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 580, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 580, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 580, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe84f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.642000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 580, "source": "L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 581, "describer_id": 312, "size": 7077888}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 581, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304, 768], "is_leaf": true, "stride": [768, 1], "storage": 581, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8630>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 581, "source": "L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 582, "describer_id": 312, "size": 9216}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 582, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [2304], "is_leaf": true, "stride": [1], "storage": 582, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8770>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.643000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 582, "source": "L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 583, "describer_id": 312, "size": 2359296}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 583, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 768], "is_leaf": true, "stride": [768, 1], "storage": 583, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe88b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 583, "source": "L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 584, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 584, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 584, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe89f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.644000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 584, "source": "L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 585, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 585, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 585, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8b30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.645000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 585, "source": "L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 586, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 586, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 586, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8c70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 586, "source": "L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 587, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 587, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072, 768], "is_leaf": true, "stride": [768, 1], "storage": 587, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8db0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.646000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 587, "source": "L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 588, "describer_id": 312, "size": 12288}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 588, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [3072], "is_leaf": true, "stride": [1], "storage": 588, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8ef0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 588, "source": "L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 589, "describer_id": 312, "size": 9437184}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 589, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768, 3072], "is_leaf": true, "stride": [3072, 1], "storage": 589, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9030>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.647000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 589, "source": "L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 590, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 590, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 590, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9170>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 590, "source": "L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.648000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 591, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 591, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 591, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe92b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 591, "source": "L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 592, "describer_id": 312, "size": 3072}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 592, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [768], "is_leaf": true, "stride": [1], "storage": 592, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe93f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.649000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 592, "source": "L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 593, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 593, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 593, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f920>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 593, "source": "L['self'].state[list(L['self'].state.keys())[0]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 594, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 594, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 594, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f790>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.650000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 594, "source": "L['self'].state[list(L['self'].state.keys())[2]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 595, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 595, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 595, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f4c0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 595, "source": "L['self'].state[list(L['self'].state.keys())[3]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 596, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 596, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 596, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f420>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.651000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 596, "source": "L['self'].state[list(L['self'].state.keys())[4]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 597, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 597, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 597, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f3d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 597, "source": "L['self'].state[list(L['self'].state.keys())[5]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 598, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.652000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 598, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 598, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f100>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 598, "source": "L['self'].state[list(L['self'].state.keys())[6]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 599, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 599, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 599, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f060>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 599, "source": "L['self'].state[list(L['self'].state.keys())[7]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.653000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 600, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 600, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 600, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6f010>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 600, "source": "L['self'].state[list(L['self'].state.keys())[8]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 601, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 601, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 601, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e930>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 601, "source": "L['self'].state[list(L['self'].state.keys())[9]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.654000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 602, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 602, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 602, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d490>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 602, "source": "L['self'].state[list(L['self'].state.keys())[10]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 603, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 603, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 603, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d530>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.655000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 603, "source": "L['self'].state[list(L['self'].state.keys())[11]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 604, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 604, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 604, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d850>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.656000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 604, "source": "L['self'].state[list(L['self'].state.keys())[12]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 605, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 605, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 605, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d8f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 605, "source": "L['self'].state[list(L['self'].state.keys())[13]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 606, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 606, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 606, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d580>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.657000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 606, "source": "L['self'].state[list(L['self'].state.keys())[14]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 607, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 607, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 607, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dbc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 607, "source": "L['self'].state[list(L['self'].state.keys())[15]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.658000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 608, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 608, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 608, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dcb0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 608, "source": "L['self'].state[list(L['self'].state.keys())[16]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 609, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 609, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 609, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dd00>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 609, "source": "L['self'].state[list(L['self'].state.keys())[17]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.659000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 610, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 610, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 610, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dc10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 610, "source": "L['self'].state[list(L['self'].state.keys())[18]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 611, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 611, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 611, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e020>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.660000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 611, "source": "L['self'].state[list(L['self'].state.keys())[19]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 612, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 612, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 612, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6dfd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 612, "source": "L['self'].state[list(L['self'].state.keys())[20]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 613, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 613, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 613, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e390>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.661000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 613, "source": "L['self'].state[list(L['self'].state.keys())[21]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 614, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 614, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 614, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e070>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 614, "source": "L['self'].state[list(L['self'].state.keys())[22]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 615, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 615, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 615, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e2f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.662000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 615, "source": "L['self'].state[list(L['self'].state.keys())[23]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 616, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 616, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 616, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 616, "source": "L['self'].state[list(L['self'].state.keys())[24]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 617, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 617, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 617, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e7f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.663000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 617, "source": "L['self'].state[list(L['self'].state.keys())[25]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 618, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 618, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 618, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6e840>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 618, "source": "L['self'].state[list(L['self'].state.keys())[26]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 619, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 619, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 619, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6eac0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.664000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 619, "source": "L['self'].state[list(L['self'].state.keys())[27]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 620, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 620, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 620, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6eb60>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 620, "source": "L['self'].state[list(L['self'].state.keys())[28]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 621, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 621, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 621, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cb6d440>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.665000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 621, "source": "L['self'].state[list(L['self'].state.keys())[29]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 622, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 622, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 622, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd7257b57b0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 622, "source": "L['self'].state[list(L['self'].state.keys())[30]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.666000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 623, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 623, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 623, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70d595da0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 623, "source": "L['self'].state[list(L['self'].state.keys())[31]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 624, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 624, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 624, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f762cf0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 624, "source": "L['self'].state[list(L['self'].state.keys())[32]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.667000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 625, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 625, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 625, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70f762610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 625, "source": "L['self'].state[list(L['self'].state.keys())[33]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 626, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 626, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 626, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.668000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 626, "source": "L['self'].state[list(L['self'].state.keys())[34]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 627, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 627, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 627, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2430>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 627, "source": "L['self'].state[list(L['self'].state.keys())[35]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 628, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 628, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 628, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2200>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.669000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 628, "source": "L['self'].state[list(L['self'].state.keys())[36]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 629, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 629, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 629, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2020>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 629, "source": "L['self'].state[list(L['self'].state.keys())[37]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 630, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 630, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 630, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1df0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.670000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 630, "source": "L['self'].state[list(L['self'].state.keys())[38]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 631, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 631, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 631, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1b70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 631, "source": "L['self'].state[list(L['self'].state.keys())[39]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 632, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 632, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 632, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1990>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.671000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 632, "source": "L['self'].state[list(L['self'].state.keys())[40]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 633, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 633, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 633, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1710>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 633, "source": "L['self'].state[list(L['self'].state.keys())[41]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 634, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 634, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 634, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1260>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.672000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 634, "source": "L['self'].state[list(L['self'].state.keys())[42]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 635, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 635, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 635, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1300>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 635, "source": "L['self'].state[list(L['self'].state.keys())[43]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 636, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 636, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 636, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1080>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.673000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 636, "source": "L['self'].state[list(L['self'].state.keys())[44]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 637, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 637, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 637, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0bd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 637, "source": "L['self'].state[list(L['self'].state.keys())[45]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 638, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.674000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 638, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 638, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0c70>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 638, "source": "L['self'].state[list(L['self'].state.keys())[46]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 639, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 639, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 639, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe09f0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 639, "source": "L['self'].state[list(L['self'].state.keys())[47]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 640, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 640, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 640, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0540>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 640, "source": "L['self'].state[list(L['self'].state.keys())[48]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 641, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 641, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 641, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe05e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.676000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 641, "source": "L['self'].state[list(L['self'].state.keys())[49]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 642, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 642, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 642, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2520>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 642, "source": "L['self'].state[list(L['self'].state.keys())[50]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 643, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 643, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 643, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1850>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 643, "source": "L['self'].state[list(L['self'].state.keys())[51]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 644, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 644, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 644, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0b30>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 644, "source": "L['self'].state[list(L['self'].state.keys())[52]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 645, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 645, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 645, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2660>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.678000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 645, "source": "L['self'].state[list(L['self'].state.keys())[53]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 646, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 646, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 646, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe18a0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 646, "source": "L['self'].state[list(L['self'].state.keys())[54]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 647, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 647, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 647, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1bc0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.679000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 647, "source": "L['self'].state[list(L['self'].state.keys())[55]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 648, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 648, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 648, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1940>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 648, "source": "L['self'].state[list(L['self'].state.keys())[56]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 649, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 649, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 649, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1210>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.680000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 649, "source": "L['self'].state[list(L['self'].state.keys())[57]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 650, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 650, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 650, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe1120>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 650, "source": "L['self'].state[list(L['self'].state.keys())[58]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 651, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 651, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 651, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0db0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.681000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 651, "source": "L['self'].state[list(L['self'].state.keys())[59]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 652, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 652, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 652, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0900>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 652, "source": "L['self'].state[list(L['self'].state.keys())[60]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 653, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 653, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 653, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe0630>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.682000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 653, "source": "L['self'].state[list(L['self'].state.keys())[61]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 654, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 654, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 654, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe28e0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 654, "source": "L['self'].state[list(L['self'].state.keys())[62]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 655, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.683000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 655, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 655, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe29d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 655, "source": "L['self'].state[list(L['self'].state.keys())[63]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 656, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 656, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 656, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2b10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 656, "source": "L['self'].state[list(L['self'].state.keys())[64]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.684000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 657, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 657, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 657, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2c50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 657, "source": "L['self'].state[list(L['self'].state.keys())[65]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 658, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 658, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 658, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2d90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.685000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 658, "source": "L['self'].state[list(L['self'].state.keys())[66]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 659, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 659, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 659, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe2ed0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 659, "source": "L['self'].state[list(L['self'].state.keys())[67]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 660, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 660, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 660, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3010>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.686000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 660, "source": "L['self'].state[list(L['self'].state.keys())[68]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 661, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 661, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 661, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3150>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 661, "source": "L['self'].state[list(L['self'].state.keys())[69]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 662, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 662, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 662, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3290>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.687000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 662, "source": "L['self'].state[list(L['self'].state.keys())[70]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 663, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 663, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 663, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe33d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 663, "source": "L['self'].state[list(L['self'].state.keys())[71]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 664, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 664, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 664, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3510>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.688000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 664, "source": "L['self'].state[list(L['self'].state.keys())[72]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 665, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 665, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 665, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3650>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 665, "source": "L['self'].state[list(L['self'].state.keys())[73]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 666, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 666, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 666, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3790>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.689000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 666, "source": "L['self'].state[list(L['self'].state.keys())[74]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 667, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 667, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 667, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe38d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 667, "source": "L['self'].state[list(L['self'].state.keys())[75]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 668, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 668, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 668, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3a10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.690000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 668, "source": "L['self'].state[list(L['self'].state.keys())[76]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 669, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 669, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 669, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3b50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 669, "source": "L['self'].state[list(L['self'].state.keys())[77]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 670, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.691000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 670, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 670, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3c90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 670, "source": "L['self'].state[list(L['self'].state.keys())[78]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 671, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 671, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 671, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3dd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 671, "source": "L['self'].state[list(L['self'].state.keys())[79]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.692000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 672, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 672, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 672, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe3f10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 672, "source": "L['self'].state[list(L['self'].state.keys())[80]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 673, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 673, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 673, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac090>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 673, "source": "L['self'].state[list(L['self'].state.keys())[81]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.693000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 674, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 674, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 674, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac1d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 674, "source": "L['self'].state[list(L['self'].state.keys())[82]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 675, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 675, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 675, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac310>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.694000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 675, "source": "L['self'].state[list(L['self'].state.keys())[83]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 676, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 676, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 676, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac450>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 676, "source": "L['self'].state[list(L['self'].state.keys())[84]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 677, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 677, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 677, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac590>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.695000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 677, "source": "L['self'].state[list(L['self'].state.keys())[85]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 678, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 678, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 678, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac6d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 678, "source": "L['self'].state[list(L['self'].state.keys())[86]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 679, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 679, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 679, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac810>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.696000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 679, "source": "L['self'].state[list(L['self'].state.keys())[87]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 680, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 680, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 680, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbac950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 680, "source": "L['self'].state[list(L['self'].state.keys())[88]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 681, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 681, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 681, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaca90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.697000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 681, "source": "L['self'].state[list(L['self'].state.keys())[89]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 682, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 682, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 682, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacbd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 682, "source": "L['self'].state[list(L['self'].state.keys())[90]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 683, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 683, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 683, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacd10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.698000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 683, "source": "L['self'].state[list(L['self'].state.keys())[91]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 684, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 684, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 684, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbace50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 684, "source": "L['self'].state[list(L['self'].state.keys())[92]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 685, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.699000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 685, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 685, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbacf90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 685, "source": "L['self'].state[list(L['self'].state.keys())[93]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 686, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 686, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 686, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad0d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 686, "source": "L['self'].state[list(L['self'].state.keys())[94]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.700000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 687, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 687, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 687, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad210>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 687, "source": "L['self'].state[list(L['self'].state.keys())[95]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 688, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 688, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 688, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad350>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 688, "source": "L['self'].state[list(L['self'].state.keys())[96]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.701000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 689, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 689, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 689, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad490>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 689, "source": "L['self'].state[list(L['self'].state.keys())[97]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 690, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 690, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 690, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad5d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.702000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 690, "source": "L['self'].state[list(L['self'].state.keys())[98]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 691, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 691, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 691, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad710>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 691, "source": "L['self'].state[list(L['self'].state.keys())[99]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 692, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 692, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 692, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad850>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.703000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 692, "source": "L['self'].state[list(L['self'].state.keys())[100]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 693, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 693, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 693, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbad990>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 693, "source": "L['self'].state[list(L['self'].state.keys())[101]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 694, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 694, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 694, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadad0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.704000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 694, "source": "L['self'].state[list(L['self'].state.keys())[102]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 695, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 695, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 695, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadc10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 695, "source": "L['self'].state[list(L['self'].state.keys())[103]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 696, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 696, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 696, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadd50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.705000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 696, "source": "L['self'].state[list(L['self'].state.keys())[104]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 697, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 697, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 697, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbade90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 697, "source": "L['self'].state[list(L['self'].state.keys())[105]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 698, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 698, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 698, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbadfd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.706000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 698, "source": "L['self'].state[list(L['self'].state.keys())[106]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 699, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 699, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 699, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae110>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 699, "source": "L['self'].state[list(L['self'].state.keys())[107]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 700, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 700, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 700, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae250>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.707000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 700, "source": "L['self'].state[list(L['self'].state.keys())[108]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 701, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 701, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 701, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae390>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 701, "source": "L['self'].state[list(L['self'].state.keys())[109]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 702, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 702, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 702, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae4d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.708000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 702, "source": "L['self'].state[list(L['self'].state.keys())[110]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 703, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 703, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 703, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae610>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 703, "source": "L['self'].state[list(L['self'].state.keys())[111]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.709000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 704, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 704, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 704, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae750>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 704, "source": "L['self'].state[list(L['self'].state.keys())[112]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 705, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 705, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 705, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae890>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 705, "source": "L['self'].state[list(L['self'].state.keys())[113]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.710000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 706, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 706, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 706, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbae9d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 706, "source": "L['self'].state[list(L['self'].state.keys())[114]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 707, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 707, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 707, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaeb10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.711000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 707, "source": "L['self'].state[list(L['self'].state.keys())[115]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 708, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 708, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 708, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaec50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 708, "source": "L['self'].state[list(L['self'].state.keys())[116]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 709, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 709, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 709, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaed90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.712000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 709, "source": "L['self'].state[list(L['self'].state.keys())[117]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 710, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 710, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 710, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaeed0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 710, "source": "L['self'].state[list(L['self'].state.keys())[118]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 711, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 711, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 711, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf010>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.713000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 711, "source": "L['self'].state[list(L['self'].state.keys())[119]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 712, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 712, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 712, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf150>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.714000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 712, "source": "L['self'].state[list(L['self'].state.keys())[120]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 713, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 713, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 713, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf290>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 713, "source": "L['self'].state[list(L['self'].state.keys())[121]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 714, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 714, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 714, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf3d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.715000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 714, "source": "L['self'].state[list(L['self'].state.keys())[122]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 715, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 715, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 715, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf510>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 715, "source": "L['self'].state[list(L['self'].state.keys())[123]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 716, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 716, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 716, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf650>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.716000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 716, "source": "L['self'].state[list(L['self'].state.keys())[124]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 717, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 717, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 717, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf790>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 717, "source": "L['self'].state[list(L['self'].state.keys())[125]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 718, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 718, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 718, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaf8d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.717000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 718, "source": "L['self'].state[list(L['self'].state.keys())[126]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 719, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 719, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 719, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafa10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 719, "source": "L['self'].state[list(L['self'].state.keys())[127]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 720, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 720, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 720, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafb50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.718000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 720, "source": "L['self'].state[list(L['self'].state.keys())[128]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 721, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 721, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 721, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafc90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 721, "source": "L['self'].state[list(L['self'].state.keys())[129]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.719000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 722, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 722, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 722, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbafdd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 722, "source": "L['self'].state[list(L['self'].state.keys())[130]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 723, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 723, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 723, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbaff10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 723, "source": "L['self'].state[list(L['self'].state.keys())[131]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.720000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 724, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 724, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 724, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8090>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 724, "source": "L['self'].state[list(L['self'].state.keys())[132]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 725, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 725, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 725, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe81d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.721000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 725, "source": "L['self'].state[list(L['self'].state.keys())[133]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 726, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 726, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 726, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8310>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 726, "source": "L['self'].state[list(L['self'].state.keys())[134]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 727, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 727, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 727, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8450>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.722000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 727, "source": "L['self'].state[list(L['self'].state.keys())[135]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 728, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 728, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 728, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8590>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 728, "source": "L['self'].state[list(L['self'].state.keys())[136]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 729, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 729, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 729, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe86d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.723000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 729, "source": "L['self'].state[list(L['self'].state.keys())[137]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 730, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 730, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 730, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8810>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 730, "source": "L['self'].state[list(L['self'].state.keys())[138]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 731, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 731, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 731, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8950>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.724000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 731, "source": "L['self'].state[list(L['self'].state.keys())[139]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 732, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 732, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 732, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8a90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 732, "source": "L['self'].state[list(L['self'].state.keys())[140]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 733, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 733, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 733, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8bd0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.725000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 733, "source": "L['self'].state[list(L['self'].state.keys())[141]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 734, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 734, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 734, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8d10>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 734, "source": "L['self'].state[list(L['self'].state.keys())[142]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 735, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 735, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 735, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8e50>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.726000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 735, "source": "L['self'].state[list(L['self'].state.keys())[143]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 736, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 736, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 736, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe8f90>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 736, "source": "L['self'].state[list(L['self'].state.keys())[144]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 737, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.727000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 737, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 737, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe90d0>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 737, "source": "L['self'].state[list(L['self'].state.keys())[145]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 738, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 738, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 738, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9210>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 738, "source": "L['self'].state[list(L['self'].state.keys())[146]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 739, "describer_id": 312, "size": 4}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.728000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 739, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "is_leaf": true, "stride": [], "storage": 739, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd70cbe9350>", "describer_id": 312}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:06.729000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 312, "id": 739, "source": "L['self'].state[list(L['self'].state.keys())[147]]['step']"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:07.381000 4107173 torch/_dynamo/output_graph.py:1337] {"dynamo_output_graph": {"sizes": {"l_self_param_groups_0_params_0_": [50304, 768], "l_self_param_groups_0_params_1_": [1024, 768], "l_self_param_groups_0_params_2_": [768], "l_self_param_groups_0_params_3_": [768], "l_self_param_groups_0_params_4_": [2304, 768], "l_self_param_groups_0_params_5_": [2304], "l_self_param_groups_0_params_6_": [768, 768], "l_self_param_groups_0_params_7_": [768], "l_self_param_groups_0_params_8_": [768], "l_self_param_groups_0_params_9_": [768], "l_self_param_groups_0_params_10_": [3072, 768], "l_self_param_groups_0_params_11_": [3072], "l_self_param_groups_0_params_12_": [768, 3072], "l_self_param_groups_0_params_13_": [768], "l_self_param_groups_0_params_14_": [768], "l_self_param_groups_0_params_15_": [768], "l_self_param_groups_0_params_16_": [2304, 768], "l_self_param_groups_0_params_17_": [2304], "l_self_param_groups_0_params_18_": [768, 768], "l_self_param_groups_0_params_19_": [768], "l_self_param_groups_0_params_20_": [768], "l_self_param_groups_0_params_21_": [768], "l_self_param_groups_0_params_22_": [3072, 768], "l_self_param_groups_0_params_23_": [3072], "l_self_param_groups_0_params_24_": [768, 3072], "l_self_param_groups_0_params_25_": [768], "l_self_param_groups_0_params_26_": [768], "l_self_param_groups_0_params_27_": [768], "l_self_param_groups_0_params_28_": [2304, 768], "l_self_param_groups_0_params_29_": [2304], "l_self_param_groups_0_params_30_": [768, 768], "l_self_param_groups_0_params_31_": [768], "l_self_param_groups_0_params_32_": [768], "l_self_param_groups_0_params_33_": [768], "l_self_param_groups_0_params_34_": [3072, 768], "l_self_param_groups_0_params_35_": [3072], "l_self_param_groups_0_params_36_": [768, 3072], "l_self_param_groups_0_params_37_": [768], "l_self_param_groups_0_params_38_": [768], "l_self_param_groups_0_params_39_": [768], "l_self_param_groups_0_params_40_": [2304, 768], "l_self_param_groups_0_params_41_": [2304], "l_self_param_groups_0_params_42_": [768, 768], "l_self_param_groups_0_params_43_": [768], "l_self_param_groups_0_params_44_": [768], "l_self_param_groups_0_params_45_": [768], "l_self_param_groups_0_params_46_": [3072, 768], "l_self_param_groups_0_params_47_": [3072], "l_self_param_groups_0_params_48_": [768, 3072], "l_self_param_groups_0_params_49_": [768], "l_self_param_groups_0_params_50_": [768], "l_self_param_groups_0_params_51_": [768], "l_self_param_groups_0_params_52_": [2304, 768], "l_self_param_groups_0_params_53_": [2304], "l_self_param_groups_0_params_54_": [768, 768], "l_self_param_groups_0_params_55_": [768], "l_self_param_groups_0_params_56_": [768], "l_self_param_groups_0_params_57_": [768], "l_self_param_groups_0_params_58_": [3072, 768], "l_self_param_groups_0_params_59_": [3072], "l_self_param_groups_0_params_60_": [768, 3072], "l_self_param_groups_0_params_61_": [768], "l_self_param_groups_0_params_62_": [768], "l_self_param_groups_0_params_63_": [768], "l_self_param_groups_0_params_64_": [2304, 768], "l_self_param_groups_0_params_65_": [2304], "l_self_param_groups_0_params_66_": [768, 768], "l_self_param_groups_0_params_67_": [768], "l_self_param_groups_0_params_68_": [768], "l_self_param_groups_0_params_69_": [768], "l_self_param_groups_0_params_70_": [3072, 768], "l_self_param_groups_0_params_71_": [3072], "l_self_param_groups_0_params_72_": [768, 3072], "l_self_param_groups_0_params_73_": [768], "l_self_param_groups_0_params_74_": [768], "l_self_param_groups_0_params_75_": [768], "l_self_param_groups_0_params_76_": [2304, 768], "l_self_param_groups_0_params_77_": [2304], "l_self_param_groups_0_params_78_": [768, 768], "l_self_param_groups_0_params_79_": [768], "l_self_param_groups_0_params_80_": [768], "l_self_param_groups_0_params_81_": [768], "l_self_param_groups_0_params_82_": [3072, 768], "l_self_param_groups_0_params_83_": [3072], "l_self_param_groups_0_params_84_": [768, 3072], "l_self_param_groups_0_params_85_": [768], "l_self_param_groups_0_params_86_": [768], "l_self_param_groups_0_params_87_": [768], "l_self_param_groups_0_params_88_": [2304, 768], "l_self_param_groups_0_params_89_": [2304], "l_self_param_groups_0_params_90_": [768, 768], "l_self_param_groups_0_params_91_": [768], "l_self_param_groups_0_params_92_": [768], "l_self_param_groups_0_params_93_": [768], "l_self_param_groups_0_params_94_": [3072, 768], "l_self_param_groups_0_params_95_": [3072], "l_self_param_groups_0_params_96_": [768, 3072], "l_self_param_groups_0_params_97_": [768], "l_self_param_groups_0_params_98_": [768], "l_self_param_groups_0_params_99_": [768], "l_self_param_groups_0_params_100_": [2304, 768], "l_self_param_groups_0_params_101_": [2304], "l_self_param_groups_0_params_102_": [768, 768], "l_self_param_groups_0_params_103_": [768], "l_self_param_groups_0_params_104_": [768], "l_self_param_groups_0_params_105_": [768], "l_self_param_groups_0_params_106_": [3072, 768], "l_self_param_groups_0_params_107_": [3072], "l_self_param_groups_0_params_108_": [768, 3072], "l_self_param_groups_0_params_109_": [768], "l_self_param_groups_0_params_110_": [768], "l_self_param_groups_0_params_111_": [768], "l_self_param_groups_0_params_112_": [2304, 768], "l_self_param_groups_0_params_113_": [2304], "l_self_param_groups_0_params_114_": [768, 768], "l_self_param_groups_0_params_115_": [768], "l_self_param_groups_0_params_116_": [768], "l_self_param_groups_0_params_117_": [768], "l_self_param_groups_0_params_118_": [3072, 768], "l_self_param_groups_0_params_119_": [3072], "l_self_param_groups_0_params_120_": [768, 3072], "l_self_param_groups_0_params_121_": [768], "l_self_param_groups_0_params_122_": [768], "l_self_param_groups_0_params_123_": [768], "l_self_param_groups_0_params_124_": [2304, 768], "l_self_param_groups_0_params_125_": [2304], "l_self_param_groups_0_params_126_": [768, 768], "l_self_param_groups_0_params_127_": [768], "l_self_param_groups_0_params_128_": [768], "l_self_param_groups_0_params_129_": [768], "l_self_param_groups_0_params_130_": [3072, 768], "l_self_param_groups_0_params_131_": [3072], "l_self_param_groups_0_params_132_": [768, 3072], "l_self_param_groups_0_params_133_": [768], "l_self_param_groups_0_params_134_": [768], "l_self_param_groups_0_params_135_": [768], "l_self_param_groups_0_params_136_": [2304, 768], "l_self_param_groups_0_params_137_": [2304], "l_self_param_groups_0_params_138_": [768, 768], "l_self_param_groups_0_params_139_": [768], "l_self_param_groups_0_params_140_": [768], "l_self_param_groups_0_params_141_": [768], "l_self_param_groups_0_params_142_": [3072, 768], "l_self_param_groups_0_params_143_": [3072], "l_self_param_groups_0_params_144_": [768, 3072], "l_self_param_groups_0_params_145_": [768], "l_self_param_groups_0_params_146_": [768], "l_self_param_groups_0_params_147_": [768], "l_self_state_list_l_self_state_keys_1_step_": [], "l_self_state_list_l_self_state_keys_1_exp_avg_": [1024, 768], "l_self_state_list_l_self_state_keys_1_exp_avg_sq_": [1024, 768], "l_self_param_groups_0_params_0_grad": [50304, 768], "l_self_param_groups_0_params_1_grad": [1024, 768], "l_self_param_groups_0_params_2_grad": [768], "l_self_param_groups_0_params_3_grad": [768], "l_self_param_groups_0_params_4_grad": [2304, 768], "l_self_param_groups_0_params_5_grad": [2304], "l_self_param_groups_0_params_6_grad": [768, 768], "l_self_param_groups_0_params_7_grad": [768], "l_self_param_groups_0_params_8_grad": [768], "l_self_param_groups_0_params_9_grad": [768], "l_self_param_groups_0_params_10_grad": [3072, 768], "l_self_param_groups_0_params_11_grad": [3072], "l_self_param_groups_0_params_12_grad": [768, 3072], "l_self_param_groups_0_params_13_grad": [768], "l_self_param_groups_0_params_14_grad": [768], "l_self_param_groups_0_params_15_grad": [768], "l_self_param_groups_0_params_16_grad": [2304, 768], "l_self_param_groups_0_params_17_grad": [2304], "l_self_param_groups_0_params_18_grad": [768, 768], "l_self_param_groups_0_params_19_grad": [768], "l_self_param_groups_0_params_20_grad": [768], "l_self_param_groups_0_params_21_grad": [768], "l_self_param_groups_0_params_22_grad": [3072, 768], "l_self_param_groups_0_params_23_grad": [3072], "l_self_param_groups_0_params_24_grad": [768, 3072], "l_self_param_groups_0_params_25_grad": [768], "l_self_param_groups_0_params_26_grad": [768], "l_self_param_groups_0_params_27_grad": [768], "l_self_param_groups_0_params_28_grad": [2304, 768], "l_self_param_groups_0_params_29_grad": [2304], "l_self_param_groups_0_params_30_grad": [768, 768], "l_self_param_groups_0_params_31_grad": [768], "l_self_param_groups_0_params_32_grad": [768], "l_self_param_groups_0_params_33_grad": [768], "l_self_param_groups_0_params_34_grad": [3072, 768], "l_self_param_groups_0_params_35_grad": [3072], "l_self_param_groups_0_params_36_grad": [768, 3072], "l_self_param_groups_0_params_37_grad": [768], "l_self_param_groups_0_params_38_grad": [768], "l_self_param_groups_0_params_39_grad": [768], "l_self_param_groups_0_params_40_grad": [2304, 768], "l_self_param_groups_0_params_41_grad": [2304], "l_self_param_groups_0_params_42_grad": [768, 768], "l_self_param_groups_0_params_43_grad": [768], "l_self_param_groups_0_params_44_grad": [768], "l_self_param_groups_0_params_45_grad": [768], "l_self_param_groups_0_params_46_grad": [3072, 768], "l_self_param_groups_0_params_47_grad": [3072], "l_self_param_groups_0_params_48_grad": [768, 3072], "l_self_param_groups_0_params_49_grad": [768], "l_self_param_groups_0_params_50_grad": [768], "l_self_param_groups_0_params_51_grad": [768], "l_self_param_groups_0_params_52_grad": [2304, 768], "l_self_param_groups_0_params_53_grad": [2304], "l_self_param_groups_0_params_54_grad": [768, 768], "l_self_param_groups_0_params_55_grad": [768], "l_self_param_groups_0_params_56_grad": [768], "l_self_param_groups_0_params_57_grad": [768], "l_self_param_groups_0_params_58_grad": [3072, 768], "l_self_param_groups_0_params_59_grad": [3072], "l_self_param_groups_0_params_60_grad": [768, 3072], "l_self_param_groups_0_params_61_grad": [768], "l_self_param_groups_0_params_62_grad": [768], "l_self_param_groups_0_params_63_grad": [768], "l_self_param_groups_0_params_64_grad": [2304, 768], "l_self_param_groups_0_params_65_grad": [2304], "l_self_param_groups_0_params_66_grad": [768, 768], "l_self_param_groups_0_params_67_grad": [768], "l_self_param_groups_0_params_68_grad": [768], "l_self_param_groups_0_params_69_grad": [768], "l_self_param_groups_0_params_70_grad": [3072, 768], "l_self_param_groups_0_params_71_grad": [3072], "l_self_param_groups_0_params_72_grad": [768, 3072], "l_self_param_groups_0_params_73_grad": [768], "l_self_param_groups_0_params_74_grad": [768], "l_self_param_groups_0_params_75_grad": [768], "l_self_param_groups_0_params_76_grad": [2304, 768], "l_self_param_groups_0_params_77_grad": [2304], "l_self_param_groups_0_params_78_grad": [768, 768], "l_self_param_groups_0_params_79_grad": [768], "l_self_param_groups_0_params_80_grad": [768], "l_self_param_groups_0_params_81_grad": [768], "l_self_param_groups_0_params_82_grad": [3072, 768], "l_self_param_groups_0_params_83_grad": [3072], "l_self_param_groups_0_params_84_grad": [768, 3072], "l_self_param_groups_0_params_85_grad": [768], "l_self_param_groups_0_params_86_grad": [768], "l_self_param_groups_0_params_87_grad": [768], "l_self_param_groups_0_params_88_grad": [2304, 768], "l_self_param_groups_0_params_89_grad": [2304], "l_self_param_groups_0_params_90_grad": [768, 768], "l_self_param_groups_0_params_91_grad": [768], "l_self_param_groups_0_params_92_grad": [768], "l_self_param_groups_0_params_93_grad": [768], "l_self_param_groups_0_params_94_grad": [3072, 768], "l_self_param_groups_0_params_95_grad": [3072], "l_self_param_groups_0_params_96_grad": [768, 3072], "l_self_param_groups_0_params_97_grad": [768], "l_self_param_groups_0_params_98_grad": [768], "l_self_param_groups_0_params_99_grad": [768], "l_self_param_groups_0_params_100_grad": [2304, 768], "l_self_param_groups_0_params_101_grad": [2304], "l_self_param_groups_0_params_102_grad": [768, 768], "l_self_param_groups_0_params_103_grad": [768], "l_self_param_groups_0_params_104_grad": [768], "l_self_param_groups_0_params_105_grad": [768], "l_self_param_groups_0_params_106_grad": [3072, 768], "l_self_param_groups_0_params_107_grad": [3072], "l_self_param_groups_0_params_108_grad": [768, 3072], "l_self_param_groups_0_params_109_grad": [768], "l_self_param_groups_0_params_110_grad": [768], "l_self_param_groups_0_params_111_grad": [768], "l_self_param_groups_0_params_112_grad": [2304, 768], "l_self_param_groups_0_params_113_grad": [2304], "l_self_param_groups_0_params_114_grad": [768, 768], "l_self_param_groups_0_params_115_grad": [768], "l_self_param_groups_0_params_116_grad": [768], "l_self_param_groups_0_params_117_grad": [768], "l_self_param_groups_0_params_118_grad": [3072, 768], "l_self_param_groups_0_params_119_grad": [3072], "l_self_param_groups_0_params_120_grad": [768, 3072], "l_self_param_groups_0_params_121_grad": [768], "l_self_param_groups_0_params_122_grad": [768], "l_self_param_groups_0_params_123_grad": [768], "l_self_param_groups_0_params_124_grad": [2304, 768], "l_self_param_groups_0_params_125_grad": [2304], "l_self_param_groups_0_params_126_grad": [768, 768], "l_self_param_groups_0_params_127_grad": [768], "l_self_param_groups_0_params_128_grad": [768], "l_self_param_groups_0_params_129_grad": [768], "l_self_param_groups_0_params_130_grad": [3072, 768], "l_self_param_groups_0_params_131_grad": [3072], "l_self_param_groups_0_params_132_grad": [768, 3072], "l_self_param_groups_0_params_133_grad": [768], "l_self_param_groups_0_params_134_grad": [768], "l_self_param_groups_0_params_135_grad": [768], "l_self_param_groups_0_params_136_grad": [2304, 768], "l_self_param_groups_0_params_137_grad": [2304], "l_self_param_groups_0_params_138_grad": [768, 768], "l_self_param_groups_0_params_139_grad": [768], "l_self_param_groups_0_params_140_grad": [768], "l_self_param_groups_0_params_141_grad": [768], "l_self_param_groups_0_params_142_grad": [3072, 768], "l_self_param_groups_0_params_143_grad": [3072], "l_self_param_groups_0_params_144_grad": [768, 3072], "l_self_param_groups_0_params_145_grad": [768], "l_self_param_groups_0_params_146_grad": [768], "l_self_param_groups_0_params_147_grad": [768], "l_self_state_list_l_self_state_keys_0_exp_avg_": [50304, 768], "l_self_state_list_l_self_state_keys_2_exp_avg_": [768], "l_self_state_list_l_self_state_keys_3_exp_avg_": [768], "l_self_state_list_l_self_state_keys_4_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_5_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_6_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_7_exp_avg_": [768], "l_self_state_list_l_self_state_keys_8_exp_avg_": [768], "l_self_state_list_l_self_state_keys_9_exp_avg_": [768], "l_self_state_list_l_self_state_keys_10_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_11_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_12_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_13_exp_avg_": [768], "l_self_state_list_l_self_state_keys_14_exp_avg_": [768], "l_self_state_list_l_self_state_keys_15_exp_avg_": [768], "l_self_state_list_l_self_state_keys_16_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_17_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_18_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_19_exp_avg_": [768], "l_self_state_list_l_self_state_keys_20_exp_avg_": [768], "l_self_state_list_l_self_state_keys_21_exp_avg_": [768], "l_self_state_list_l_self_state_keys_22_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_23_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_24_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_25_exp_avg_": [768], "l_self_state_list_l_self_state_keys_26_exp_avg_": [768], "l_self_state_list_l_self_state_keys_27_exp_avg_": [768], "l_self_state_list_l_self_state_keys_28_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_29_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_30_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_31_exp_avg_": [768], "l_self_state_list_l_self_state_keys_32_exp_avg_": [768], "l_self_state_list_l_self_state_keys_33_exp_avg_": [768], "l_self_state_list_l_self_state_keys_34_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_35_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_36_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_37_exp_avg_": [768], "l_self_state_list_l_self_state_keys_38_exp_avg_": [768], "l_self_state_list_l_self_state_keys_39_exp_avg_": [768], "l_self_state_list_l_self_state_keys_40_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_41_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_42_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_43_exp_avg_": [768], "l_self_state_list_l_self_state_keys_44_exp_avg_": [768], "l_self_state_list_l_self_state_keys_45_exp_avg_": [768], "l_self_state_list_l_self_state_keys_46_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_47_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_48_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_49_exp_avg_": [768], "l_self_state_list_l_self_state_keys_50_exp_avg_": [768], "l_self_state_list_l_self_state_keys_51_exp_avg_": [768], "l_self_state_list_l_self_state_keys_52_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_53_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_54_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_55_exp_avg_": [768], "l_self_state_list_l_self_state_keys_56_exp_avg_": [768], "l_self_state_list_l_self_state_keys_57_exp_avg_": [768], "l_self_state_list_l_self_state_keys_58_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_59_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_60_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_61_exp_avg_": [768], "l_self_state_list_l_self_state_keys_62_exp_avg_": [768], "l_self_state_list_l_self_state_keys_63_exp_avg_": [768], "l_self_state_list_l_self_state_keys_64_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_65_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_66_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_67_exp_avg_": [768], "l_self_state_list_l_self_state_keys_68_exp_avg_": [768], "l_self_state_list_l_self_state_keys_69_exp_avg_": [768], "l_self_state_list_l_self_state_keys_70_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_71_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_72_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_73_exp_avg_": [768], "l_self_state_list_l_self_state_keys_74_exp_avg_": [768], "l_self_state_list_l_self_state_keys_75_exp_avg_": [768], "l_self_state_list_l_self_state_keys_76_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_77_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_78_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_79_exp_avg_": [768], "l_self_state_list_l_self_state_keys_80_exp_avg_": [768], "l_self_state_list_l_self_state_keys_81_exp_avg_": [768], "l_self_state_list_l_self_state_keys_82_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_83_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_84_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_85_exp_avg_": [768], "l_self_state_list_l_self_state_keys_86_exp_avg_": [768], "l_self_state_list_l_self_state_keys_87_exp_avg_": [768], "l_self_state_list_l_self_state_keys_88_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_89_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_90_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_91_exp_avg_": [768], "l_self_state_list_l_self_state_keys_92_exp_avg_": [768], "l_self_state_list_l_self_state_keys_93_exp_avg_": [768], "l_self_state_list_l_self_state_keys_94_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_95_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_96_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_97_exp_avg_": [768], "l_self_state_list_l_self_state_keys_98_exp_avg_": [768], "l_self_state_list_l_self_state_keys_99_exp_avg_": [768], "l_self_state_list_l_self_state_keys_100_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_101_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_102_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_103_exp_avg_": [768], "l_self_state_list_l_self_state_keys_104_exp_avg_": [768], "l_self_state_list_l_self_state_keys_105_exp_avg_": [768], "l_self_state_list_l_self_state_keys_106_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_107_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_108_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_109_exp_avg_": [768], "l_self_state_list_l_self_state_keys_110_exp_avg_": [768], "l_self_state_list_l_self_state_keys_111_exp_avg_": [768], "l_self_state_list_l_self_state_keys_112_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_113_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_114_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_115_exp_avg_": [768], "l_self_state_list_l_self_state_keys_116_exp_avg_": [768], "l_self_state_list_l_self_state_keys_117_exp_avg_": [768], "l_self_state_list_l_self_state_keys_118_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_119_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_120_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_121_exp_avg_": [768], "l_self_state_list_l_self_state_keys_122_exp_avg_": [768], "l_self_state_list_l_self_state_keys_123_exp_avg_": [768], "l_self_state_list_l_self_state_keys_124_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_125_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_126_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_127_exp_avg_": [768], "l_self_state_list_l_self_state_keys_128_exp_avg_": [768], "l_self_state_list_l_self_state_keys_129_exp_avg_": [768], "l_self_state_list_l_self_state_keys_130_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_131_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_132_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_133_exp_avg_": [768], "l_self_state_list_l_self_state_keys_134_exp_avg_": [768], "l_self_state_list_l_self_state_keys_135_exp_avg_": [768], "l_self_state_list_l_self_state_keys_136_exp_avg_": [2304, 768], "l_self_state_list_l_self_state_keys_137_exp_avg_": [2304], "l_self_state_list_l_self_state_keys_138_exp_avg_": [768, 768], "l_self_state_list_l_self_state_keys_139_exp_avg_": [768], "l_self_state_list_l_self_state_keys_140_exp_avg_": [768], "l_self_state_list_l_self_state_keys_141_exp_avg_": [768], "l_self_state_list_l_self_state_keys_142_exp_avg_": [3072, 768], "l_self_state_list_l_self_state_keys_143_exp_avg_": [3072], "l_self_state_list_l_self_state_keys_144_exp_avg_": [768, 3072], "l_self_state_list_l_self_state_keys_145_exp_avg_": [768], "l_self_state_list_l_self_state_keys_146_exp_avg_": [768], "l_self_state_list_l_self_state_keys_147_exp_avg_": [768], "l_self_state_list_l_self_state_keys_0_exp_avg_sq_": [50304, 768], "l_self_state_list_l_self_state_keys_2_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_3_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_4_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_5_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_6_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_7_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_8_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_9_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_10_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_11_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_12_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_13_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_14_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_15_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_16_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_17_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_18_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_19_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_20_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_21_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_22_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_23_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_24_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_25_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_26_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_27_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_28_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_29_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_30_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_31_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_32_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_33_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_34_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_35_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_36_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_37_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_38_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_39_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_40_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_41_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_42_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_43_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_44_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_45_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_46_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_47_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_48_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_49_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_50_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_51_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_52_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_53_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_54_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_55_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_56_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_57_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_58_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_59_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_60_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_61_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_62_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_63_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_64_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_65_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_66_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_67_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_68_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_69_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_70_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_71_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_72_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_73_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_74_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_75_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_76_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_77_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_78_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_79_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_80_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_81_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_82_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_83_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_84_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_85_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_86_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_87_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_88_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_89_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_90_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_91_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_92_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_93_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_94_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_95_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_96_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_97_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_98_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_99_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_100_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_101_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_102_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_103_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_104_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_105_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_106_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_107_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_108_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_109_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_110_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_111_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_112_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_113_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_114_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_115_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_116_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_117_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_118_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_119_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_120_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_121_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_122_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_123_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_124_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_125_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_126_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_127_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_128_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_129_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_130_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_131_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_132_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_133_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_134_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_135_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_136_exp_avg_sq_": [2304, 768], "l_self_state_list_l_self_state_keys_137_exp_avg_sq_": [2304], "l_self_state_list_l_self_state_keys_138_exp_avg_sq_": [768, 768], "l_self_state_list_l_self_state_keys_139_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_140_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_141_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_142_exp_avg_sq_": [3072, 768], "l_self_state_list_l_self_state_keys_143_exp_avg_sq_": [3072], "l_self_state_list_l_self_state_keys_144_exp_avg_sq_": [768, 3072], "l_self_state_list_l_self_state_keys_145_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_146_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_147_exp_avg_sq_": [768], "l_self_state_list_l_self_state_keys_0_step_": [], "l_self_state_list_l_self_state_keys_2_step_": [], "l_self_state_list_l_self_state_keys_3_step_": [], "l_self_state_list_l_self_state_keys_4_step_": [], "l_self_state_list_l_self_state_keys_5_step_": [], "l_self_state_list_l_self_state_keys_6_step_": [], "l_self_state_list_l_self_state_keys_7_step_": [], "l_self_state_list_l_self_state_keys_8_step_": [], "l_self_state_list_l_self_state_keys_9_step_": [], "l_self_state_list_l_self_state_keys_10_step_": [], "l_self_state_list_l_self_state_keys_11_step_": [], "l_self_state_list_l_self_state_keys_12_step_": [], "l_self_state_list_l_self_state_keys_13_step_": [], "l_self_state_list_l_self_state_keys_14_step_": [], "l_self_state_list_l_self_state_keys_15_step_": [], "l_self_state_list_l_self_state_keys_16_step_": [], "l_self_state_list_l_self_state_keys_17_step_": [], "l_self_state_list_l_self_state_keys_18_step_": [], "l_self_state_list_l_self_state_keys_19_step_": [], "l_self_state_list_l_self_state_keys_20_step_": [], "l_self_state_list_l_self_state_keys_21_step_": [], "l_self_state_list_l_self_state_keys_22_step_": [], "l_self_state_list_l_self_state_keys_23_step_": [], "l_self_state_list_l_self_state_keys_24_step_": [], "l_self_state_list_l_self_state_keys_25_step_": [], "l_self_state_list_l_self_state_keys_26_step_": [], "l_self_state_list_l_self_state_keys_27_step_": [], "l_self_state_list_l_self_state_keys_28_step_": [], "l_self_state_list_l_self_state_keys_29_step_": [], "l_self_state_list_l_self_state_keys_30_step_": [], "l_self_state_list_l_self_state_keys_31_step_": [], "l_self_state_list_l_self_state_keys_32_step_": [], "l_self_state_list_l_self_state_keys_33_step_": [], "l_self_state_list_l_self_state_keys_34_step_": [], "l_self_state_list_l_self_state_keys_35_step_": [], "l_self_state_list_l_self_state_keys_36_step_": [], "l_self_state_list_l_self_state_keys_37_step_": [], "l_self_state_list_l_self_state_keys_38_step_": [], "l_self_state_list_l_self_state_keys_39_step_": [], "l_self_state_list_l_self_state_keys_40_step_": [], "l_self_state_list_l_self_state_keys_41_step_": [], "l_self_state_list_l_self_state_keys_42_step_": [], "l_self_state_list_l_self_state_keys_43_step_": [], "l_self_state_list_l_self_state_keys_44_step_": [], "l_self_state_list_l_self_state_keys_45_step_": [], "l_self_state_list_l_self_state_keys_46_step_": [], "l_self_state_list_l_self_state_keys_47_step_": [], "l_self_state_list_l_self_state_keys_48_step_": [], "l_self_state_list_l_self_state_keys_49_step_": [], "l_self_state_list_l_self_state_keys_50_step_": [], "l_self_state_list_l_self_state_keys_51_step_": [], "l_self_state_list_l_self_state_keys_52_step_": [], "l_self_state_list_l_self_state_keys_53_step_": [], "l_self_state_list_l_self_state_keys_54_step_": [], "l_self_state_list_l_self_state_keys_55_step_": [], "l_self_state_list_l_self_state_keys_56_step_": [], "l_self_state_list_l_self_state_keys_57_step_": [], "l_self_state_list_l_self_state_keys_58_step_": [], "l_self_state_list_l_self_state_keys_59_step_": [], "l_self_state_list_l_self_state_keys_60_step_": [], "l_self_state_list_l_self_state_keys_61_step_": [], "l_self_state_list_l_self_state_keys_62_step_": [], "l_self_state_list_l_self_state_keys_63_step_": [], "l_self_state_list_l_self_state_keys_64_step_": [], "l_self_state_list_l_self_state_keys_65_step_": [], "l_self_state_list_l_self_state_keys_66_step_": [], "l_self_state_list_l_self_state_keys_67_step_": [], "l_self_state_list_l_self_state_keys_68_step_": [], "l_self_state_list_l_self_state_keys_69_step_": [], "l_self_state_list_l_self_state_keys_70_step_": [], "l_self_state_list_l_self_state_keys_71_step_": [], "l_self_state_list_l_self_state_keys_72_step_": [], "l_self_state_list_l_self_state_keys_73_step_": [], "l_self_state_list_l_self_state_keys_74_step_": [], "l_self_state_list_l_self_state_keys_75_step_": [], "l_self_state_list_l_self_state_keys_76_step_": [], "l_self_state_list_l_self_state_keys_77_step_": [], "l_self_state_list_l_self_state_keys_78_step_": [], "l_self_state_list_l_self_state_keys_79_step_": [], "l_self_state_list_l_self_state_keys_80_step_": [], "l_self_state_list_l_self_state_keys_81_step_": [], "l_self_state_list_l_self_state_keys_82_step_": [], "l_self_state_list_l_self_state_keys_83_step_": [], "l_self_state_list_l_self_state_keys_84_step_": [], "l_self_state_list_l_self_state_keys_85_step_": [], "l_self_state_list_l_self_state_keys_86_step_": [], "l_self_state_list_l_self_state_keys_87_step_": [], "l_self_state_list_l_self_state_keys_88_step_": [], "l_self_state_list_l_self_state_keys_89_step_": [], "l_self_state_list_l_self_state_keys_90_step_": [], "l_self_state_list_l_self_state_keys_91_step_": [], "l_self_state_list_l_self_state_keys_92_step_": [], "l_self_state_list_l_self_state_keys_93_step_": [], "l_self_state_list_l_self_state_keys_94_step_": [], "l_self_state_list_l_self_state_keys_95_step_": [], "l_self_state_list_l_self_state_keys_96_step_": [], "l_self_state_list_l_self_state_keys_97_step_": [], "l_self_state_list_l_self_state_keys_98_step_": [], "l_self_state_list_l_self_state_keys_99_step_": [], "l_self_state_list_l_self_state_keys_100_step_": [], "l_self_state_list_l_self_state_keys_101_step_": [], "l_self_state_list_l_self_state_keys_102_step_": [], "l_self_state_list_l_self_state_keys_103_step_": [], "l_self_state_list_l_self_state_keys_104_step_": [], "l_self_state_list_l_self_state_keys_105_step_": [], "l_self_state_list_l_self_state_keys_106_step_": [], "l_self_state_list_l_self_state_keys_107_step_": [], "l_self_state_list_l_self_state_keys_108_step_": [], "l_self_state_list_l_self_state_keys_109_step_": [], "l_self_state_list_l_self_state_keys_110_step_": [], "l_self_state_list_l_self_state_keys_111_step_": [], "l_self_state_list_l_self_state_keys_112_step_": [], "l_self_state_list_l_self_state_keys_113_step_": [], "l_self_state_list_l_self_state_keys_114_step_": [], "l_self_state_list_l_self_state_keys_115_step_": [], "l_self_state_list_l_self_state_keys_116_step_": [], "l_self_state_list_l_self_state_keys_117_step_": [], "l_self_state_list_l_self_state_keys_118_step_": [], "l_self_state_list_l_self_state_keys_119_step_": [], "l_self_state_list_l_self_state_keys_120_step_": [], "l_self_state_list_l_self_state_keys_121_step_": [], "l_self_state_list_l_self_state_keys_122_step_": [], "l_self_state_list_l_self_state_keys_123_step_": [], "l_self_state_list_l_self_state_keys_124_step_": [], "l_self_state_list_l_self_state_keys_125_step_": [], "l_self_state_list_l_self_state_keys_126_step_": [], "l_self_state_list_l_self_state_keys_127_step_": [], "l_self_state_list_l_self_state_keys_128_step_": [], "l_self_state_list_l_self_state_keys_129_step_": [], "l_self_state_list_l_self_state_keys_130_step_": [], "l_self_state_list_l_self_state_keys_131_step_": [], "l_self_state_list_l_self_state_keys_132_step_": [], "l_self_state_list_l_self_state_keys_133_step_": [], "l_self_state_list_l_self_state_keys_134_step_": [], "l_self_state_list_l_self_state_keys_135_step_": [], "l_self_state_list_l_self_state_keys_136_step_": [], "l_self_state_list_l_self_state_keys_137_step_": [], "l_self_state_list_l_self_state_keys_138_step_": [], "l_self_state_list_l_self_state_keys_139_step_": [], "l_self_state_list_l_self_state_keys_140_step_": [], "l_self_state_list_l_self_state_keys_141_step_": [], "l_self_state_list_l_self_state_keys_142_step_": [], "l_self_state_list_l_self_state_keys_143_step_": [], "l_self_state_list_l_self_state_keys_144_step_": [], "l_self_state_list_l_self_state_keys_145_step_": [], "l_self_state_list_l_self_state_keys_146_step_": [], "l_self_state_list_l_self_state_keys_147_step_": [], "getitem_592": [], "getitem_593": [], "getitem_594": [], "getitem_595": [], "getitem_596": [], "getitem_597": [], "getitem_598": [], "getitem_599": [], "getitem_600": [], "getitem_601": [], "getitem_602": [], "getitem_603": [], "getitem_604": [], "getitem_605": [], "getitem_606": [], "getitem_607": [], "getitem_608": [], "getitem_609": [], "getitem_610": [], "getitem_611": [], "getitem_612": [], "getitem_613": [], "getitem_614": [], "getitem_615": [], "getitem_616": [], "getitem_617": [], "getitem_618": [], "getitem_619": [], "getitem_620": [], "getitem_621": [], "getitem_622": [], "getitem_623": [], "getitem_624": [], "getitem_625": [], "getitem_626": [], "getitem_627": [], "getitem_628": [], "getitem_629": [], "getitem_630": [], "getitem_631": [], "getitem_632": [], "getitem_633": [], "getitem_634": [], "getitem_635": [], "getitem_636": [], "getitem_637": [], "getitem_638": [], "getitem_639": [], "getitem_640": [], "getitem_641": [], "getitem_642": [], "getitem_643": [], "getitem_644": [], "getitem_645": [], "getitem_646": [], "getitem_647": [], "getitem_648": [], "getitem_649": [], "getitem_650": [], "getitem_651": [], "getitem_652": [], "getitem_653": [], "getitem_654": [], "getitem_655": [], "getitem_656": [], "getitem_657": [], "getitem_658": [], "getitem_659": [], "getitem_660": [], "getitem_661": [], "getitem_662": [], "getitem_663": [], "getitem_664": [], "getitem_665": [], "getitem_666": [], "getitem_667": [], "getitem_668": [], "getitem_669": [], "getitem_670": [], "getitem_671": [], "getitem_672": [], "getitem_673": [], "getitem_674": [], "getitem_675": [], "getitem_676": [], "getitem_677": [], "getitem_678": [], "getitem_679": [], "getitem_680": [], "getitem_681": [], "getitem_682": [], "getitem_683": [], "getitem_684": [], "getitem_685": [], "getitem_686": [], "getitem_687": [], "getitem_688": [], "getitem_689": [], "getitem_690": [], "getitem_691": [], "getitem_692": [], "getitem_693": [], "getitem_694": [], "getitem_695": [], "getitem_696": [], "getitem_697": [], "getitem_698": [], "getitem_699": [], "getitem_700": [], "getitem_701": [], "getitem_702": [], "getitem_703": [], "getitem_704": [], "getitem_705": [], "getitem_706": [], "getitem_707": [], "getitem_708": [], "getitem_709": [], "getitem_710": [], "getitem_711": [], "getitem_712": [], "getitem_713": [], "getitem_714": [], "getitem_715": [], "getitem_716": [], "getitem_717": [], "getitem_718": [], "getitem_719": [], "getitem_720": [], "getitem_721": [], "getitem_722": [], "getitem_723": [], "getitem_724": [], "getitem_725": [], "getitem_726": [], "getitem_727": [], "getitem_728": [], "getitem_729": [], "getitem_730": [], "getitem_731": [], "getitem_732": [], "getitem_733": [], "getitem_734": [], "getitem_735": [], "getitem_736": [], "getitem_737": [], "getitem_738": [], "getitem_739": [], "getitem_740": [], "getitem_741": [], "getitem_742": [], "getitem_743": [], "getitem_744": [], "getitem_745": [], "getitem_746": [], "getitem_747": [], "getitem_748": [], "getitem_749": [], "getitem_750": [], "getitem_751": [], "getitem_752": [], "getitem_753": [], "getitem_754": [], "getitem_755": [], "getitem_756": [], "getitem_757": [], "getitem_758": [], "getitem_759": [], "getitem_760": [], "getitem_761": [], "getitem_762": [], "getitem_763": [], "getitem_764": [], "getitem_765": [], "getitem_766": [], "getitem_767": [], "getitem_768": [], "getitem_769": [], "getitem_770": [], "getitem_771": [], "getitem_772": [], "getitem_773": [], "getitem_774": [], "getitem_775": [], "getitem_776": [], "getitem_777": [], "getitem_778": [], "getitem_779": [], "getitem_780": [], "getitem_781": [], "getitem_782": [], "getitem_783": [], "getitem_784": [], "getitem_785": [], "getitem_786": [], "getitem_787": [], "getitem_788": [], "getitem_789": [], "getitem_790": [], "getitem_791": [], "getitem_792": [], "getitem_793": [], "getitem_794": [], "getitem_795": [], "getitem_796": [], "getitem_797": [], "getitem_798": [], "getitem_799": [], "getitem_800": [], "getitem_801": [], "getitem_802": [], "getitem_803": [], "getitem_804": [], "getitem_805": [], "getitem_806": [], "getitem_807": [], "getitem_808": [], "getitem_809": [], "getitem_810": [], "getitem_811": [], "getitem_812": [], "getitem_813": [], "getitem_814": [], "getitem_815": [], "getitem_816": [], "getitem_817": [], "getitem_818": [], "getitem_819": [], "getitem_820": [], "getitem_821": [], "getitem_822": [], "getitem_823": [], "getitem_824": [], "getitem_825": [], "getitem_826": [], "getitem_827": [], "getitem_828": [], "getitem_829": [], "getitem_830": [], "getitem_831": [], "getitem_832": [], "getitem_833": [], "getitem_834": [], "getitem_835": [], "getitem_836": [], "getitem_837": [], "getitem_838": [], "getitem_839": [], "getitem_840": [], "getitem_841": [], "getitem_842": [], "getitem_843": [], "getitem_844": [], "getitem_845": [], "getitem_846": [], "getitem_847": [], "getitem_848": [], "getitem_849": [], "getitem_850": [], "getitem_851": [], "getitem_852": [], "getitem_853": [], "getitem_854": [], "getitem_855": [], "getitem_856": [], "getitem_857": [], "getitem_858": [], "getitem_859": [], "getitem_860": [], "getitem_861": [], "getitem_862": [], "getitem_863": [], "getitem_864": [], "getitem_865": [], "getitem_866": [], "getitem_867": [], "getitem_868": [], "getitem_869": [], "getitem_870": [], "getitem_871": [], "getitem_872": [], "getitem_873": [], "getitem_874": [], "getitem_875": [], "getitem_876": [], "getitem_877": [], "getitem_878": [], "getitem_879": [], "getitem_880": [], "getitem_881": [], "getitem_882": [], "getitem_883": [], "getitem_884": [], "getitem_885": [], "getitem_886": [], "getitem_887": [], "getitem_1776": [50304, 768], "getitem_1777": [1024, 768], "getitem_1778": [768], "getitem_1779": [768], "getitem_1780": [2304, 768], "getitem_1781": [2304], "getitem_1782": [768, 768], "getitem_1783": [768], "getitem_1784": [768], "getitem_1785": [768], "getitem_1786": [3072, 768], "getitem_1787": [3072], "getitem_1788": [768, 3072], "getitem_1789": [768], "getitem_1790": [768], "getitem_1791": [768], "getitem_1792": [2304, 768], "getitem_1793": [2304], "getitem_1794": [768, 768], "getitem_1795": [768], "getitem_1796": [768], "getitem_1797": [768], "getitem_1798": [3072, 768], "getitem_1799": [3072], "getitem_1800": [768, 3072], "getitem_1801": [768], "getitem_1802": [768], "getitem_1803": [768], "getitem_1804": [2304, 768], "getitem_1805": [2304], "getitem_1806": [768, 768], "getitem_1807": [768], "getitem_1808": [768], "getitem_1809": [768], "getitem_1810": [3072, 768], "getitem_1811": [3072], "getitem_1812": [768, 3072], "getitem_1813": [768], "getitem_1814": [768], "getitem_1815": [768], "getitem_1816": [2304, 768], "getitem_1817": [2304], "getitem_1818": [768, 768], "getitem_1819": [768], "getitem_1820": [768], "getitem_1821": [768], "getitem_1822": [3072, 768], "getitem_1823": [3072], "getitem_1824": [768, 3072], "getitem_1825": [768], "getitem_1826": [768], "getitem_1827": [768], "getitem_1828": [2304, 768], "getitem_1829": [2304], "getitem_1830": [768, 768], "getitem_1831": [768], "getitem_1832": [768], "getitem_1833": [768], "getitem_1834": [3072, 768], "getitem_1835": [3072], "getitem_1836": [768, 3072], "getitem_1837": [768], "getitem_1838": [768], "getitem_1839": [768], "getitem_1840": [2304, 768], "getitem_1841": [2304], "getitem_1842": [768, 768], "getitem_1843": [768], "getitem_1844": [768], "getitem_1845": [768], "getitem_1846": [3072, 768], "getitem_1847": [3072], "getitem_1848": [768, 3072], "getitem_1849": [768], "getitem_1850": [768], "getitem_1851": [768], "getitem_1852": [2304, 768], "getitem_1853": [2304], "getitem_1854": [768, 768], "getitem_1855": [768], "getitem_1856": [768], "getitem_1857": [768], "getitem_1858": [3072, 768], "getitem_1859": [3072], "getitem_1860": [768, 3072], "getitem_1861": [768], "getitem_1862": [768], "getitem_1863": [768], "getitem_1864": [2304, 768], "getitem_1865": [2304], "getitem_1866": [768, 768], "getitem_1867": [768], "getitem_1868": [768], "getitem_1869": [768], "getitem_1870": [3072, 768], "getitem_1871": [3072], "getitem_1872": [768, 3072], "getitem_1873": [768], "getitem_1874": [768], "getitem_1875": [768], "getitem_1876": [2304, 768], "getitem_1877": [2304], "getitem_1878": [768, 768], "getitem_1879": [768], "getitem_1880": [768], "getitem_1881": [768], "getitem_1882": [3072, 768], "getitem_1883": [3072], "getitem_1884": [768, 3072], "getitem_1885": [768], "getitem_1886": [768], "getitem_1887": [768], "getitem_1888": [2304, 768], "getitem_1889": [2304], "getitem_1890": [768, 768], "getitem_1891": [768], "getitem_1892": [768], "getitem_1893": [768], "getitem_1894": [3072, 768], "getitem_1895": [3072], "getitem_1896": [768, 3072], "getitem_1897": [768], "getitem_1898": [768], "getitem_1899": [768], "getitem_1900": [2304, 768], "getitem_1901": [2304], "getitem_1902": [768, 768], "getitem_1903": [768], "getitem_1904": [768], "getitem_1905": [768], "getitem_1906": [3072, 768], "getitem_1907": [3072], "getitem_1908": [768, 3072], "getitem_1909": [768], "getitem_1910": [768], "getitem_1911": [768], "getitem_1912": [2304, 768], "getitem_1913": [2304], "getitem_1914": [768, 768], "getitem_1915": [768], "getitem_1916": [768], "getitem_1917": [768], "getitem_1918": [3072, 768], "getitem_1919": [3072], "getitem_1920": [768, 3072], "getitem_1921": [768], "getitem_1922": [768], "getitem_1923": [768]}}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "81513b26edcd911a5160c972fdbca10f"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, L_self_param_groups_0_params_0_: "f32[50304, 768][768, 1]cuda:0", L_self_param_groups_0_params_1_: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_2_: "f32[768][1]cuda:0", L_self_param_groups_0_params_3_: "f32[768][1]cuda:0", L_self_param_groups_0_params_4_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_5_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_6_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_7_: "f32[768][1]cuda:0", L_self_param_groups_0_params_8_: "f32[768][1]cuda:0", L_self_param_groups_0_params_9_: "f32[768][1]cuda:0", L_self_param_groups_0_params_10_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_11_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_12_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_13_: "f32[768][1]cuda:0", L_self_param_groups_0_params_14_: "f32[768][1]cuda:0", L_self_param_groups_0_params_15_: "f32[768][1]cuda:0", L_self_param_groups_0_params_16_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_17_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_18_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_19_: "f32[768][1]cuda:0", L_self_param_groups_0_params_20_: "f32[768][1]cuda:0", L_self_param_groups_0_params_21_: "f32[768][1]cuda:0", L_self_param_groups_0_params_22_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_23_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_24_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_25_: "f32[768][1]cuda:0", L_self_param_groups_0_params_26_: "f32[768][1]cuda:0", L_self_param_groups_0_params_27_: "f32[768][1]cuda:0", L_self_param_groups_0_params_28_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_29_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_30_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_31_: "f32[768][1]cuda:0", L_self_param_groups_0_params_32_: "f32[768][1]cuda:0", L_self_param_groups_0_params_33_: "f32[768][1]cuda:0", L_self_param_groups_0_params_34_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_35_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_36_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_37_: "f32[768][1]cuda:0", L_self_param_groups_0_params_38_: "f32[768][1]cuda:0", L_self_param_groups_0_params_39_: "f32[768][1]cuda:0", L_self_param_groups_0_params_40_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_41_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_42_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_43_: "f32[768][1]cuda:0", L_self_param_groups_0_params_44_: "f32[768][1]cuda:0", L_self_param_groups_0_params_45_: "f32[768][1]cuda:0", L_self_param_groups_0_params_46_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_47_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_48_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_49_: "f32[768][1]cuda:0", L_self_param_groups_0_params_50_: "f32[768][1]cuda:0", L_self_param_groups_0_params_51_: "f32[768][1]cuda:0", L_self_param_groups_0_params_52_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_53_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_54_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_55_: "f32[768][1]cuda:0", L_self_param_groups_0_params_56_: "f32[768][1]cuda:0", L_self_param_groups_0_params_57_: "f32[768][1]cuda:0", L_self_param_groups_0_params_58_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_59_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_60_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_61_: "f32[768][1]cuda:0", L_self_param_groups_0_params_62_: "f32[768][1]cuda:0", L_self_param_groups_0_params_63_: "f32[768][1]cuda:0", L_self_param_groups_0_params_64_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_65_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_66_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_67_: "f32[768][1]cuda:0", L_self_param_groups_0_params_68_: "f32[768][1]cuda:0", L_self_param_groups_0_params_69_: "f32[768][1]cuda:0", L_self_param_groups_0_params_70_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_71_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_72_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_73_: "f32[768][1]cuda:0", L_self_param_groups_0_params_74_: "f32[768][1]cuda:0", L_self_param_groups_0_params_75_: "f32[768][1]cuda:0", L_self_param_groups_0_params_76_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_77_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_78_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_79_: "f32[768][1]cuda:0", L_self_param_groups_0_params_80_: "f32[768][1]cuda:0", L_self_param_groups_0_params_81_: "f32[768][1]cuda:0", L_self_param_groups_0_params_82_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_83_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_84_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_85_: "f32[768][1]cuda:0", L_self_param_groups_0_params_86_: "f32[768][1]cuda:0", L_self_param_groups_0_params_87_: "f32[768][1]cuda:0", L_self_param_groups_0_params_88_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_89_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_90_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_91_: "f32[768][1]cuda:0", L_self_param_groups_0_params_92_: "f32[768][1]cuda:0", L_self_param_groups_0_params_93_: "f32[768][1]cuda:0", L_self_param_groups_0_params_94_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_95_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_96_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_97_: "f32[768][1]cuda:0", L_self_param_groups_0_params_98_: "f32[768][1]cuda:0", L_self_param_groups_0_params_99_: "f32[768][1]cuda:0", L_self_param_groups_0_params_100_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_101_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_102_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_103_: "f32[768][1]cuda:0", L_self_param_groups_0_params_104_: "f32[768][1]cuda:0", L_self_param_groups_0_params_105_: "f32[768][1]cuda:0", L_self_param_groups_0_params_106_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_107_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_108_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_109_: "f32[768][1]cuda:0", L_self_param_groups_0_params_110_: "f32[768][1]cuda:0", L_self_param_groups_0_params_111_: "f32[768][1]cuda:0", L_self_param_groups_0_params_112_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_113_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_114_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_115_: "f32[768][1]cuda:0", L_self_param_groups_0_params_116_: "f32[768][1]cuda:0", L_self_param_groups_0_params_117_: "f32[768][1]cuda:0", L_self_param_groups_0_params_118_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_119_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_120_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_121_: "f32[768][1]cuda:0", L_self_param_groups_0_params_122_: "f32[768][1]cuda:0", L_self_param_groups_0_params_123_: "f32[768][1]cuda:0", L_self_param_groups_0_params_124_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_125_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_126_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_127_: "f32[768][1]cuda:0", L_self_param_groups_0_params_128_: "f32[768][1]cuda:0", L_self_param_groups_0_params_129_: "f32[768][1]cuda:0", L_self_param_groups_0_params_130_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_131_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_132_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_133_: "f32[768][1]cuda:0", L_self_param_groups_0_params_134_: "f32[768][1]cuda:0", L_self_param_groups_0_params_135_: "f32[768][1]cuda:0", L_self_param_groups_0_params_136_: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_137_: "f32[2304][1]cuda:0", L_self_param_groups_0_params_138_: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_139_: "f32[768][1]cuda:0", L_self_param_groups_0_params_140_: "f32[768][1]cuda:0", L_self_param_groups_0_params_141_: "f32[768][1]cuda:0", L_self_param_groups_0_params_142_: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_143_: "f32[3072][1]cuda:0", L_self_param_groups_0_params_144_: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_145_: "f32[768][1]cuda:0", L_self_param_groups_0_params_146_: "f32[768][1]cuda:0", L_self_param_groups_0_params_147_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_1_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_1_exp_avg_: "f32[1024, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_1_exp_avg_sq_: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_0_grad: "f32[50304, 768][768, 1]cuda:0", L_self_param_groups_0_params_1_grad: "f32[1024, 768][768, 1]cuda:0", L_self_param_groups_0_params_2_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_3_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_4_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_5_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_6_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_7_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_8_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_9_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_10_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_11_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_12_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_13_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_14_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_15_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_16_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_17_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_18_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_19_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_20_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_21_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_22_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_23_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_24_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_25_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_26_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_27_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_28_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_29_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_30_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_31_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_32_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_33_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_34_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_35_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_36_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_37_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_38_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_39_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_40_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_41_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_42_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_43_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_44_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_45_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_46_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_47_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_48_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_49_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_50_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_51_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_52_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_53_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_54_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_55_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_56_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_57_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_58_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_59_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_60_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_61_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_62_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_63_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_64_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_65_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_66_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_67_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_68_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_69_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_70_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_71_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_72_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_73_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_74_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_75_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_76_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_77_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_78_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_79_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_80_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_81_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_82_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_83_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_84_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_85_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_86_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_87_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_88_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_89_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_90_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_91_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_92_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_93_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_94_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_95_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_96_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_97_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_98_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_99_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_100_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_101_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_102_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_103_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_104_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_105_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_106_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_107_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_108_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_109_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_110_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_111_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_112_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_113_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_114_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_115_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_116_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_117_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_118_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_119_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_120_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_121_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_122_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_123_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_124_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_125_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_126_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_127_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_128_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_129_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_130_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_131_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_132_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_133_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_134_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_135_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_136_grad: "f32[2304, 768][768, 1]cuda:0", L_self_param_groups_0_params_137_grad: "f32[2304][1]cuda:0", L_self_param_groups_0_params_138_grad: "f32[768, 768][768, 1]cuda:0", L_self_param_groups_0_params_139_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_140_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_141_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_142_grad: "f32[3072, 768][768, 1]cuda:0", L_self_param_groups_0_params_143_grad: "f32[3072][1]cuda:0", L_self_param_groups_0_params_144_grad: "f32[768, 3072][3072, 1]cuda:0", L_self_param_groups_0_params_145_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_146_grad: "f32[768][1]cuda:0", L_self_param_groups_0_params_147_grad: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_exp_avg_: "f32[50304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_2_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_3_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_4_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_5_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_6_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_7_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_8_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_9_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_10_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_11_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_12_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_13_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_14_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_15_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_16_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_17_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_18_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_19_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_20_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_21_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_22_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_23_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_24_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_25_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_26_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_27_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_28_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_29_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_30_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_31_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_32_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_33_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_34_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_35_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_36_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_37_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_38_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_39_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_40_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_41_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_42_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_43_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_44_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_45_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_46_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_47_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_48_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_49_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_50_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_51_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_52_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_53_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_54_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_55_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_56_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_57_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_58_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_59_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_60_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_61_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_62_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_63_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_64_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_65_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_66_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_67_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_68_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_69_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_70_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_71_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_72_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_73_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_74_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_75_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_76_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_77_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_78_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_79_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_80_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_81_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_82_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_83_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_84_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_85_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_86_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_87_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_88_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_89_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_90_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_91_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_92_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_93_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_94_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_95_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_96_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_97_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_98_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_99_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_100_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_101_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_102_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_103_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_104_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_105_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_106_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_107_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_108_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_109_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_110_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_111_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_112_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_113_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_114_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_115_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_116_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_117_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_118_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_119_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_120_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_121_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_122_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_123_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_124_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_125_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_126_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_127_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_128_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_129_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_130_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_131_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_132_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_133_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_134_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_135_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_136_exp_avg_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_137_exp_avg_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_138_exp_avg_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_139_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_140_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_141_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_142_exp_avg_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_143_exp_avg_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_144_exp_avg_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_145_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_146_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_147_exp_avg_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_exp_avg_sq_: "f32[50304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_2_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_3_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_4_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_5_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_6_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_7_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_8_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_9_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_10_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_11_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_12_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_13_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_14_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_15_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_16_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_17_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_18_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_19_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_20_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_21_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_22_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_23_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_24_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_25_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_26_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_27_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_28_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_29_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_30_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_31_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_32_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_33_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_34_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_35_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_36_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_37_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_38_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_39_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_40_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_41_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_42_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_43_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_44_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_45_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_46_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_47_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_48_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_49_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_50_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_51_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_52_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_53_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_54_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_55_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_56_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_57_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_58_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_59_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_60_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_61_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_62_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_63_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_64_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_65_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_66_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_67_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_68_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_69_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_70_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_71_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_72_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_73_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_74_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_75_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_76_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_77_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_78_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_79_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_80_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_81_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_82_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_83_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_84_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_85_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_86_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_87_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_88_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_89_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_90_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_91_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_92_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_93_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_94_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_95_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_96_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_97_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_98_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_99_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_100_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_101_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_102_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_103_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_104_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_105_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_106_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_107_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_108_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_109_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_110_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_111_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_112_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_113_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_114_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_115_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_116_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_117_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_118_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_119_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_120_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_121_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_122_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_123_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_124_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_125_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_126_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_127_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_128_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_129_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_130_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_131_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_132_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_133_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_134_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_135_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_136_exp_avg_sq_: "f32[2304, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_137_exp_avg_sq_: "f32[2304][1]cuda:0", L_self_state_list_L_self_state_keys_138_exp_avg_sq_: "f32[768, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_139_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_140_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_141_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_142_exp_avg_sq_: "f32[3072, 768][768, 1]cuda:0", L_self_state_list_L_self_state_keys_143_exp_avg_sq_: "f32[3072][1]cuda:0", L_self_state_list_L_self_state_keys_144_exp_avg_sq_: "f32[768, 3072][3072, 1]cuda:0", L_self_state_list_L_self_state_keys_145_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_146_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_147_exp_avg_sq_: "f32[768][1]cuda:0", L_self_state_list_L_self_state_keys_0_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_2_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_3_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_4_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_5_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_6_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_7_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_8_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_9_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_10_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_11_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_12_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_13_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_14_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_15_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_16_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_17_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_18_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_19_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_20_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_21_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_22_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_23_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_24_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_25_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_26_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_27_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_28_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_29_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_30_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_31_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_32_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_33_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_34_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_35_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_36_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_37_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_38_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_39_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_40_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_41_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_42_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_43_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_44_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_45_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_46_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_47_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_48_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_49_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_50_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_51_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_52_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_53_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_54_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_55_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_56_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_57_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_58_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_59_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_60_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_61_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_62_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_63_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_64_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_65_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_66_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_67_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_68_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_69_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_70_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_71_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_72_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_73_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_74_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_75_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_76_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_77_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_78_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_79_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_80_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_81_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_82_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_83_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_84_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_85_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_86_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_87_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_88_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_89_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_90_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_91_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_92_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_93_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_94_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_95_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_96_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_97_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_98_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_99_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_100_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_101_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_102_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_103_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_104_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_105_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_106_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_107_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_108_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_109_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_110_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_111_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_112_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_113_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_114_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_115_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_116_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_117_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_118_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_119_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_120_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_121_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_122_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_123_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_124_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_125_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_126_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_127_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_128_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_129_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_130_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_131_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_132_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_133_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_134_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_135_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_136_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_137_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_138_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_139_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_140_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_141_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_142_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_143_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_144_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_145_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_146_step_: "f32[][]cuda:0", L_self_state_list_L_self_state_keys_147_step_: "f32[][]cuda:0"):
+	        l_self_param_groups_0_params_0_ = L_self_param_groups_0_params_0_
+	        l_self_param_groups_0_params_1_ = L_self_param_groups_0_params_1_
+	        l_self_param_groups_0_params_2_ = L_self_param_groups_0_params_2_
+	        l_self_param_groups_0_params_3_ = L_self_param_groups_0_params_3_
+	        l_self_param_groups_0_params_4_ = L_self_param_groups_0_params_4_
+	        l_self_param_groups_0_params_5_ = L_self_param_groups_0_params_5_
+	        l_self_param_groups_0_params_6_ = L_self_param_groups_0_params_6_
+	        l_self_param_groups_0_params_7_ = L_self_param_groups_0_params_7_
+	        l_self_param_groups_0_params_8_ = L_self_param_groups_0_params_8_
+	        l_self_param_groups_0_params_9_ = L_self_param_groups_0_params_9_
+	        l_self_param_groups_0_params_10_ = L_self_param_groups_0_params_10_
+	        l_self_param_groups_0_params_11_ = L_self_param_groups_0_params_11_
+	        l_self_param_groups_0_params_12_ = L_self_param_groups_0_params_12_
+	        l_self_param_groups_0_params_13_ = L_self_param_groups_0_params_13_
+	        l_self_param_groups_0_params_14_ = L_self_param_groups_0_params_14_
+	        l_self_param_groups_0_params_15_ = L_self_param_groups_0_params_15_
+	        l_self_param_groups_0_params_16_ = L_self_param_groups_0_params_16_
+	        l_self_param_groups_0_params_17_ = L_self_param_groups_0_params_17_
+	        l_self_param_groups_0_params_18_ = L_self_param_groups_0_params_18_
+	        l_self_param_groups_0_params_19_ = L_self_param_groups_0_params_19_
+	        l_self_param_groups_0_params_20_ = L_self_param_groups_0_params_20_
+	        l_self_param_groups_0_params_21_ = L_self_param_groups_0_params_21_
+	        l_self_param_groups_0_params_22_ = L_self_param_groups_0_params_22_
+	        l_self_param_groups_0_params_23_ = L_self_param_groups_0_params_23_
+	        l_self_param_groups_0_params_24_ = L_self_param_groups_0_params_24_
+	        l_self_param_groups_0_params_25_ = L_self_param_groups_0_params_25_
+	        l_self_param_groups_0_params_26_ = L_self_param_groups_0_params_26_
+	        l_self_param_groups_0_params_27_ = L_self_param_groups_0_params_27_
+	        l_self_param_groups_0_params_28_ = L_self_param_groups_0_params_28_
+	        l_self_param_groups_0_params_29_ = L_self_param_groups_0_params_29_
+	        l_self_param_groups_0_params_30_ = L_self_param_groups_0_params_30_
+	        l_self_param_groups_0_params_31_ = L_self_param_groups_0_params_31_
+	        l_self_param_groups_0_params_32_ = L_self_param_groups_0_params_32_
+	        l_self_param_groups_0_params_33_ = L_self_param_groups_0_params_33_
+	        l_self_param_groups_0_params_34_ = L_self_param_groups_0_params_34_
+	        l_self_param_groups_0_params_35_ = L_self_param_groups_0_params_35_
+	        l_self_param_groups_0_params_36_ = L_self_param_groups_0_params_36_
+	        l_self_param_groups_0_params_37_ = L_self_param_groups_0_params_37_
+	        l_self_param_groups_0_params_38_ = L_self_param_groups_0_params_38_
+	        l_self_param_groups_0_params_39_ = L_self_param_groups_0_params_39_
+	        l_self_param_groups_0_params_40_ = L_self_param_groups_0_params_40_
+	        l_self_param_groups_0_params_41_ = L_self_param_groups_0_params_41_
+	        l_self_param_groups_0_params_42_ = L_self_param_groups_0_params_42_
+	        l_self_param_groups_0_params_43_ = L_self_param_groups_0_params_43_
+	        l_self_param_groups_0_params_44_ = L_self_param_groups_0_params_44_
+	        l_self_param_groups_0_params_45_ = L_self_param_groups_0_params_45_
+	        l_self_param_groups_0_params_46_ = L_self_param_groups_0_params_46_
+	        l_self_param_groups_0_params_47_ = L_self_param_groups_0_params_47_
+	        l_self_param_groups_0_params_48_ = L_self_param_groups_0_params_48_
+	        l_self_param_groups_0_params_49_ = L_self_param_groups_0_params_49_
+	        l_self_param_groups_0_params_50_ = L_self_param_groups_0_params_50_
+	        l_self_param_groups_0_params_51_ = L_self_param_groups_0_params_51_
+	        l_self_param_groups_0_params_52_ = L_self_param_groups_0_params_52_
+	        l_self_param_groups_0_params_53_ = L_self_param_groups_0_params_53_
+	        l_self_param_groups_0_params_54_ = L_self_param_groups_0_params_54_
+	        l_self_param_groups_0_params_55_ = L_self_param_groups_0_params_55_
+	        l_self_param_groups_0_params_56_ = L_self_param_groups_0_params_56_
+	        l_self_param_groups_0_params_57_ = L_self_param_groups_0_params_57_
+	        l_self_param_groups_0_params_58_ = L_self_param_groups_0_params_58_
+	        l_self_param_groups_0_params_59_ = L_self_param_groups_0_params_59_
+	        l_self_param_groups_0_params_60_ = L_self_param_groups_0_params_60_
+	        l_self_param_groups_0_params_61_ = L_self_param_groups_0_params_61_
+	        l_self_param_groups_0_params_62_ = L_self_param_groups_0_params_62_
+	        l_self_param_groups_0_params_63_ = L_self_param_groups_0_params_63_
+	        l_self_param_groups_0_params_64_ = L_self_param_groups_0_params_64_
+	        l_self_param_groups_0_params_65_ = L_self_param_groups_0_params_65_
+	        l_self_param_groups_0_params_66_ = L_self_param_groups_0_params_66_
+	        l_self_param_groups_0_params_67_ = L_self_param_groups_0_params_67_
+	        l_self_param_groups_0_params_68_ = L_self_param_groups_0_params_68_
+	        l_self_param_groups_0_params_69_ = L_self_param_groups_0_params_69_
+	        l_self_param_groups_0_params_70_ = L_self_param_groups_0_params_70_
+	        l_self_param_groups_0_params_71_ = L_self_param_groups_0_params_71_
+	        l_self_param_groups_0_params_72_ = L_self_param_groups_0_params_72_
+	        l_self_param_groups_0_params_73_ = L_self_param_groups_0_params_73_
+	        l_self_param_groups_0_params_74_ = L_self_param_groups_0_params_74_
+	        l_self_param_groups_0_params_75_ = L_self_param_groups_0_params_75_
+	        l_self_param_groups_0_params_76_ = L_self_param_groups_0_params_76_
+	        l_self_param_groups_0_params_77_ = L_self_param_groups_0_params_77_
+	        l_self_param_groups_0_params_78_ = L_self_param_groups_0_params_78_
+	        l_self_param_groups_0_params_79_ = L_self_param_groups_0_params_79_
+	        l_self_param_groups_0_params_80_ = L_self_param_groups_0_params_80_
+	        l_self_param_groups_0_params_81_ = L_self_param_groups_0_params_81_
+	        l_self_param_groups_0_params_82_ = L_self_param_groups_0_params_82_
+	        l_self_param_groups_0_params_83_ = L_self_param_groups_0_params_83_
+	        l_self_param_groups_0_params_84_ = L_self_param_groups_0_params_84_
+	        l_self_param_groups_0_params_85_ = L_self_param_groups_0_params_85_
+	        l_self_param_groups_0_params_86_ = L_self_param_groups_0_params_86_
+	        l_self_param_groups_0_params_87_ = L_self_param_groups_0_params_87_
+	        l_self_param_groups_0_params_88_ = L_self_param_groups_0_params_88_
+	        l_self_param_groups_0_params_89_ = L_self_param_groups_0_params_89_
+	        l_self_param_groups_0_params_90_ = L_self_param_groups_0_params_90_
+	        l_self_param_groups_0_params_91_ = L_self_param_groups_0_params_91_
+	        l_self_param_groups_0_params_92_ = L_self_param_groups_0_params_92_
+	        l_self_param_groups_0_params_93_ = L_self_param_groups_0_params_93_
+	        l_self_param_groups_0_params_94_ = L_self_param_groups_0_params_94_
+	        l_self_param_groups_0_params_95_ = L_self_param_groups_0_params_95_
+	        l_self_param_groups_0_params_96_ = L_self_param_groups_0_params_96_
+	        l_self_param_groups_0_params_97_ = L_self_param_groups_0_params_97_
+	        l_self_param_groups_0_params_98_ = L_self_param_groups_0_params_98_
+	        l_self_param_groups_0_params_99_ = L_self_param_groups_0_params_99_
+	        l_self_param_groups_0_params_100_ = L_self_param_groups_0_params_100_
+	        l_self_param_groups_0_params_101_ = L_self_param_groups_0_params_101_
+	        l_self_param_groups_0_params_102_ = L_self_param_groups_0_params_102_
+	        l_self_param_groups_0_params_103_ = L_self_param_groups_0_params_103_
+	        l_self_param_groups_0_params_104_ = L_self_param_groups_0_params_104_
+	        l_self_param_groups_0_params_105_ = L_self_param_groups_0_params_105_
+	        l_self_param_groups_0_params_106_ = L_self_param_groups_0_params_106_
+	        l_self_param_groups_0_params_107_ = L_self_param_groups_0_params_107_
+	        l_self_param_groups_0_params_108_ = L_self_param_groups_0_params_108_
+	        l_self_param_groups_0_params_109_ = L_self_param_groups_0_params_109_
+	        l_self_param_groups_0_params_110_ = L_self_param_groups_0_params_110_
+	        l_self_param_groups_0_params_111_ = L_self_param_groups_0_params_111_
+	        l_self_param_groups_0_params_112_ = L_self_param_groups_0_params_112_
+	        l_self_param_groups_0_params_113_ = L_self_param_groups_0_params_113_
+	        l_self_param_groups_0_params_114_ = L_self_param_groups_0_params_114_
+	        l_self_param_groups_0_params_115_ = L_self_param_groups_0_params_115_
+	        l_self_param_groups_0_params_116_ = L_self_param_groups_0_params_116_
+	        l_self_param_groups_0_params_117_ = L_self_param_groups_0_params_117_
+	        l_self_param_groups_0_params_118_ = L_self_param_groups_0_params_118_
+	        l_self_param_groups_0_params_119_ = L_self_param_groups_0_params_119_
+	        l_self_param_groups_0_params_120_ = L_self_param_groups_0_params_120_
+	        l_self_param_groups_0_params_121_ = L_self_param_groups_0_params_121_
+	        l_self_param_groups_0_params_122_ = L_self_param_groups_0_params_122_
+	        l_self_param_groups_0_params_123_ = L_self_param_groups_0_params_123_
+	        l_self_param_groups_0_params_124_ = L_self_param_groups_0_params_124_
+	        l_self_param_groups_0_params_125_ = L_self_param_groups_0_params_125_
+	        l_self_param_groups_0_params_126_ = L_self_param_groups_0_params_126_
+	        l_self_param_groups_0_params_127_ = L_self_param_groups_0_params_127_
+	        l_self_param_groups_0_params_128_ = L_self_param_groups_0_params_128_
+	        l_self_param_groups_0_params_129_ = L_self_param_groups_0_params_129_
+	        l_self_param_groups_0_params_130_ = L_self_param_groups_0_params_130_
+	        l_self_param_groups_0_params_131_ = L_self_param_groups_0_params_131_
+	        l_self_param_groups_0_params_132_ = L_self_param_groups_0_params_132_
+	        l_self_param_groups_0_params_133_ = L_self_param_groups_0_params_133_
+	        l_self_param_groups_0_params_134_ = L_self_param_groups_0_params_134_
+	        l_self_param_groups_0_params_135_ = L_self_param_groups_0_params_135_
+	        l_self_param_groups_0_params_136_ = L_self_param_groups_0_params_136_
+	        l_self_param_groups_0_params_137_ = L_self_param_groups_0_params_137_
+	        l_self_param_groups_0_params_138_ = L_self_param_groups_0_params_138_
+	        l_self_param_groups_0_params_139_ = L_self_param_groups_0_params_139_
+	        l_self_param_groups_0_params_140_ = L_self_param_groups_0_params_140_
+	        l_self_param_groups_0_params_141_ = L_self_param_groups_0_params_141_
+	        l_self_param_groups_0_params_142_ = L_self_param_groups_0_params_142_
+	        l_self_param_groups_0_params_143_ = L_self_param_groups_0_params_143_
+	        l_self_param_groups_0_params_144_ = L_self_param_groups_0_params_144_
+	        l_self_param_groups_0_params_145_ = L_self_param_groups_0_params_145_
+	        l_self_param_groups_0_params_146_ = L_self_param_groups_0_params_146_
+	        l_self_param_groups_0_params_147_ = L_self_param_groups_0_params_147_
+	        l_self_state_list_l_self_state_keys_1_step_ = L_self_state_list_L_self_state_keys_1_step_
+	        l_self_state_list_l_self_state_keys_1_exp_avg_ = L_self_state_list_L_self_state_keys_1_exp_avg_
+	        l_self_state_list_l_self_state_keys_1_exp_avg_sq_ = L_self_state_list_L_self_state_keys_1_exp_avg_sq_
+	        l_self_param_groups_0_params_0_grad = L_self_param_groups_0_params_0_grad
+	        l_self_param_groups_0_params_1_grad = L_self_param_groups_0_params_1_grad
+	        l_self_param_groups_0_params_2_grad = L_self_param_groups_0_params_2_grad
+	        l_self_param_groups_0_params_3_grad = L_self_param_groups_0_params_3_grad
+	        l_self_param_groups_0_params_4_grad = L_self_param_groups_0_params_4_grad
+	        l_self_param_groups_0_params_5_grad = L_self_param_groups_0_params_5_grad
+	        l_self_param_groups_0_params_6_grad = L_self_param_groups_0_params_6_grad
+	        l_self_param_groups_0_params_7_grad = L_self_param_groups_0_params_7_grad
+	        l_self_param_groups_0_params_8_grad = L_self_param_groups_0_params_8_grad
+	        l_self_param_groups_0_params_9_grad = L_self_param_groups_0_params_9_grad
+	        l_self_param_groups_0_params_10_grad = L_self_param_groups_0_params_10_grad
+	        l_self_param_groups_0_params_11_grad = L_self_param_groups_0_params_11_grad
+	        l_self_param_groups_0_params_12_grad = L_self_param_groups_0_params_12_grad
+	        l_self_param_groups_0_params_13_grad = L_self_param_groups_0_params_13_grad
+	        l_self_param_groups_0_params_14_grad = L_self_param_groups_0_params_14_grad
+	        l_self_param_groups_0_params_15_grad = L_self_param_groups_0_params_15_grad
+	        l_self_param_groups_0_params_16_grad = L_self_param_groups_0_params_16_grad
+	        l_self_param_groups_0_params_17_grad = L_self_param_groups_0_params_17_grad
+	        l_self_param_groups_0_params_18_grad = L_self_param_groups_0_params_18_grad
+	        l_self_param_groups_0_params_19_grad = L_self_param_groups_0_params_19_grad
+	        l_self_param_groups_0_params_20_grad = L_self_param_groups_0_params_20_grad
+	        l_self_param_groups_0_params_21_grad = L_self_param_groups_0_params_21_grad
+	        l_self_param_groups_0_params_22_grad = L_self_param_groups_0_params_22_grad
+	        l_self_param_groups_0_params_23_grad = L_self_param_groups_0_params_23_grad
+	        l_self_param_groups_0_params_24_grad = L_self_param_groups_0_params_24_grad
+	        l_self_param_groups_0_params_25_grad = L_self_param_groups_0_params_25_grad
+	        l_self_param_groups_0_params_26_grad = L_self_param_groups_0_params_26_grad
+	        l_self_param_groups_0_params_27_grad = L_self_param_groups_0_params_27_grad
+	        l_self_param_groups_0_params_28_grad = L_self_param_groups_0_params_28_grad
+	        l_self_param_groups_0_params_29_grad = L_self_param_groups_0_params_29_grad
+	        l_self_param_groups_0_params_30_grad = L_self_param_groups_0_params_30_grad
+	        l_self_param_groups_0_params_31_grad = L_self_param_groups_0_params_31_grad
+	        l_self_param_groups_0_params_32_grad = L_self_param_groups_0_params_32_grad
+	        l_self_param_groups_0_params_33_grad = L_self_param_groups_0_params_33_grad
+	        l_self_param_groups_0_params_34_grad = L_self_param_groups_0_params_34_grad
+	        l_self_param_groups_0_params_35_grad = L_self_param_groups_0_params_35_grad
+	        l_self_param_groups_0_params_36_grad = L_self_param_groups_0_params_36_grad
+	        l_self_param_groups_0_params_37_grad = L_self_param_groups_0_params_37_grad
+	        l_self_param_groups_0_params_38_grad = L_self_param_groups_0_params_38_grad
+	        l_self_param_groups_0_params_39_grad = L_self_param_groups_0_params_39_grad
+	        l_self_param_groups_0_params_40_grad = L_self_param_groups_0_params_40_grad
+	        l_self_param_groups_0_params_41_grad = L_self_param_groups_0_params_41_grad
+	        l_self_param_groups_0_params_42_grad = L_self_param_groups_0_params_42_grad
+	        l_self_param_groups_0_params_43_grad = L_self_param_groups_0_params_43_grad
+	        l_self_param_groups_0_params_44_grad = L_self_param_groups_0_params_44_grad
+	        l_self_param_groups_0_params_45_grad = L_self_param_groups_0_params_45_grad
+	        l_self_param_groups_0_params_46_grad = L_self_param_groups_0_params_46_grad
+	        l_self_param_groups_0_params_47_grad = L_self_param_groups_0_params_47_grad
+	        l_self_param_groups_0_params_48_grad = L_self_param_groups_0_params_48_grad
+	        l_self_param_groups_0_params_49_grad = L_self_param_groups_0_params_49_grad
+	        l_self_param_groups_0_params_50_grad = L_self_param_groups_0_params_50_grad
+	        l_self_param_groups_0_params_51_grad = L_self_param_groups_0_params_51_grad
+	        l_self_param_groups_0_params_52_grad = L_self_param_groups_0_params_52_grad
+	        l_self_param_groups_0_params_53_grad = L_self_param_groups_0_params_53_grad
+	        l_self_param_groups_0_params_54_grad = L_self_param_groups_0_params_54_grad
+	        l_self_param_groups_0_params_55_grad = L_self_param_groups_0_params_55_grad
+	        l_self_param_groups_0_params_56_grad = L_self_param_groups_0_params_56_grad
+	        l_self_param_groups_0_params_57_grad = L_self_param_groups_0_params_57_grad
+	        l_self_param_groups_0_params_58_grad = L_self_param_groups_0_params_58_grad
+	        l_self_param_groups_0_params_59_grad = L_self_param_groups_0_params_59_grad
+	        l_self_param_groups_0_params_60_grad = L_self_param_groups_0_params_60_grad
+	        l_self_param_groups_0_params_61_grad = L_self_param_groups_0_params_61_grad
+	        l_self_param_groups_0_params_62_grad = L_self_param_groups_0_params_62_grad
+	        l_self_param_groups_0_params_63_grad = L_self_param_groups_0_params_63_grad
+	        l_self_param_groups_0_params_64_grad = L_self_param_groups_0_params_64_grad
+	        l_self_param_groups_0_params_65_grad = L_self_param_groups_0_params_65_grad
+	        l_self_param_groups_0_params_66_grad = L_self_param_groups_0_params_66_grad
+	        l_self_param_groups_0_params_67_grad = L_self_param_groups_0_params_67_grad
+	        l_self_param_groups_0_params_68_grad = L_self_param_groups_0_params_68_grad
+	        l_self_param_groups_0_params_69_grad = L_self_param_groups_0_params_69_grad
+	        l_self_param_groups_0_params_70_grad = L_self_param_groups_0_params_70_grad
+	        l_self_param_groups_0_params_71_grad = L_self_param_groups_0_params_71_grad
+	        l_self_param_groups_0_params_72_grad = L_self_param_groups_0_params_72_grad
+	        l_self_param_groups_0_params_73_grad = L_self_param_groups_0_params_73_grad
+	        l_self_param_groups_0_params_74_grad = L_self_param_groups_0_params_74_grad
+	        l_self_param_groups_0_params_75_grad = L_self_param_groups_0_params_75_grad
+	        l_self_param_groups_0_params_76_grad = L_self_param_groups_0_params_76_grad
+	        l_self_param_groups_0_params_77_grad = L_self_param_groups_0_params_77_grad
+	        l_self_param_groups_0_params_78_grad = L_self_param_groups_0_params_78_grad
+	        l_self_param_groups_0_params_79_grad = L_self_param_groups_0_params_79_grad
+	        l_self_param_groups_0_params_80_grad = L_self_param_groups_0_params_80_grad
+	        l_self_param_groups_0_params_81_grad = L_self_param_groups_0_params_81_grad
+	        l_self_param_groups_0_params_82_grad = L_self_param_groups_0_params_82_grad
+	        l_self_param_groups_0_params_83_grad = L_self_param_groups_0_params_83_grad
+	        l_self_param_groups_0_params_84_grad = L_self_param_groups_0_params_84_grad
+	        l_self_param_groups_0_params_85_grad = L_self_param_groups_0_params_85_grad
+	        l_self_param_groups_0_params_86_grad = L_self_param_groups_0_params_86_grad
+	        l_self_param_groups_0_params_87_grad = L_self_param_groups_0_params_87_grad
+	        l_self_param_groups_0_params_88_grad = L_self_param_groups_0_params_88_grad
+	        l_self_param_groups_0_params_89_grad = L_self_param_groups_0_params_89_grad
+	        l_self_param_groups_0_params_90_grad = L_self_param_groups_0_params_90_grad
+	        l_self_param_groups_0_params_91_grad = L_self_param_groups_0_params_91_grad
+	        l_self_param_groups_0_params_92_grad = L_self_param_groups_0_params_92_grad
+	        l_self_param_groups_0_params_93_grad = L_self_param_groups_0_params_93_grad
+	        l_self_param_groups_0_params_94_grad = L_self_param_groups_0_params_94_grad
+	        l_self_param_groups_0_params_95_grad = L_self_param_groups_0_params_95_grad
+	        l_self_param_groups_0_params_96_grad = L_self_param_groups_0_params_96_grad
+	        l_self_param_groups_0_params_97_grad = L_self_param_groups_0_params_97_grad
+	        l_self_param_groups_0_params_98_grad = L_self_param_groups_0_params_98_grad
+	        l_self_param_groups_0_params_99_grad = L_self_param_groups_0_params_99_grad
+	        l_self_param_groups_0_params_100_grad = L_self_param_groups_0_params_100_grad
+	        l_self_param_groups_0_params_101_grad = L_self_param_groups_0_params_101_grad
+	        l_self_param_groups_0_params_102_grad = L_self_param_groups_0_params_102_grad
+	        l_self_param_groups_0_params_103_grad = L_self_param_groups_0_params_103_grad
+	        l_self_param_groups_0_params_104_grad = L_self_param_groups_0_params_104_grad
+	        l_self_param_groups_0_params_105_grad = L_self_param_groups_0_params_105_grad
+	        l_self_param_groups_0_params_106_grad = L_self_param_groups_0_params_106_grad
+	        l_self_param_groups_0_params_107_grad = L_self_param_groups_0_params_107_grad
+	        l_self_param_groups_0_params_108_grad = L_self_param_groups_0_params_108_grad
+	        l_self_param_groups_0_params_109_grad = L_self_param_groups_0_params_109_grad
+	        l_self_param_groups_0_params_110_grad = L_self_param_groups_0_params_110_grad
+	        l_self_param_groups_0_params_111_grad = L_self_param_groups_0_params_111_grad
+	        l_self_param_groups_0_params_112_grad = L_self_param_groups_0_params_112_grad
+	        l_self_param_groups_0_params_113_grad = L_self_param_groups_0_params_113_grad
+	        l_self_param_groups_0_params_114_grad = L_self_param_groups_0_params_114_grad
+	        l_self_param_groups_0_params_115_grad = L_self_param_groups_0_params_115_grad
+	        l_self_param_groups_0_params_116_grad = L_self_param_groups_0_params_116_grad
+	        l_self_param_groups_0_params_117_grad = L_self_param_groups_0_params_117_grad
+	        l_self_param_groups_0_params_118_grad = L_self_param_groups_0_params_118_grad
+	        l_self_param_groups_0_params_119_grad = L_self_param_groups_0_params_119_grad
+	        l_self_param_groups_0_params_120_grad = L_self_param_groups_0_params_120_grad
+	        l_self_param_groups_0_params_121_grad = L_self_param_groups_0_params_121_grad
+	        l_self_param_groups_0_params_122_grad = L_self_param_groups_0_params_122_grad
+	        l_self_param_groups_0_params_123_grad = L_self_param_groups_0_params_123_grad
+	        l_self_param_groups_0_params_124_grad = L_self_param_groups_0_params_124_grad
+	        l_self_param_groups_0_params_125_grad = L_self_param_groups_0_params_125_grad
+	        l_self_param_groups_0_params_126_grad = L_self_param_groups_0_params_126_grad
+	        l_self_param_groups_0_params_127_grad = L_self_param_groups_0_params_127_grad
+	        l_self_param_groups_0_params_128_grad = L_self_param_groups_0_params_128_grad
+	        l_self_param_groups_0_params_129_grad = L_self_param_groups_0_params_129_grad
+	        l_self_param_groups_0_params_130_grad = L_self_param_groups_0_params_130_grad
+	        l_self_param_groups_0_params_131_grad = L_self_param_groups_0_params_131_grad
+	        l_self_param_groups_0_params_132_grad = L_self_param_groups_0_params_132_grad
+	        l_self_param_groups_0_params_133_grad = L_self_param_groups_0_params_133_grad
+	        l_self_param_groups_0_params_134_grad = L_self_param_groups_0_params_134_grad
+	        l_self_param_groups_0_params_135_grad = L_self_param_groups_0_params_135_grad
+	        l_self_param_groups_0_params_136_grad = L_self_param_groups_0_params_136_grad
+	        l_self_param_groups_0_params_137_grad = L_self_param_groups_0_params_137_grad
+	        l_self_param_groups_0_params_138_grad = L_self_param_groups_0_params_138_grad
+	        l_self_param_groups_0_params_139_grad = L_self_param_groups_0_params_139_grad
+	        l_self_param_groups_0_params_140_grad = L_self_param_groups_0_params_140_grad
+	        l_self_param_groups_0_params_141_grad = L_self_param_groups_0_params_141_grad
+	        l_self_param_groups_0_params_142_grad = L_self_param_groups_0_params_142_grad
+	        l_self_param_groups_0_params_143_grad = L_self_param_groups_0_params_143_grad
+	        l_self_param_groups_0_params_144_grad = L_self_param_groups_0_params_144_grad
+	        l_self_param_groups_0_params_145_grad = L_self_param_groups_0_params_145_grad
+	        l_self_param_groups_0_params_146_grad = L_self_param_groups_0_params_146_grad
+	        l_self_param_groups_0_params_147_grad = L_self_param_groups_0_params_147_grad
+	        l_self_state_list_l_self_state_keys_0_exp_avg_ = L_self_state_list_L_self_state_keys_0_exp_avg_
+	        l_self_state_list_l_self_state_keys_2_exp_avg_ = L_self_state_list_L_self_state_keys_2_exp_avg_
+	        l_self_state_list_l_self_state_keys_3_exp_avg_ = L_self_state_list_L_self_state_keys_3_exp_avg_
+	        l_self_state_list_l_self_state_keys_4_exp_avg_ = L_self_state_list_L_self_state_keys_4_exp_avg_
+	        l_self_state_list_l_self_state_keys_5_exp_avg_ = L_self_state_list_L_self_state_keys_5_exp_avg_
+	        l_self_state_list_l_self_state_keys_6_exp_avg_ = L_self_state_list_L_self_state_keys_6_exp_avg_
+	        l_self_state_list_l_self_state_keys_7_exp_avg_ = L_self_state_list_L_self_state_keys_7_exp_avg_
+	        l_self_state_list_l_self_state_keys_8_exp_avg_ = L_self_state_list_L_self_state_keys_8_exp_avg_
+	        l_self_state_list_l_self_state_keys_9_exp_avg_ = L_self_state_list_L_self_state_keys_9_exp_avg_
+	        l_self_state_list_l_self_state_keys_10_exp_avg_ = L_self_state_list_L_self_state_keys_10_exp_avg_
+	        l_self_state_list_l_self_state_keys_11_exp_avg_ = L_self_state_list_L_self_state_keys_11_exp_avg_
+	        l_self_state_list_l_self_state_keys_12_exp_avg_ = L_self_state_list_L_self_state_keys_12_exp_avg_
+	        l_self_state_list_l_self_state_keys_13_exp_avg_ = L_self_state_list_L_self_state_keys_13_exp_avg_
+	        l_self_state_list_l_self_state_keys_14_exp_avg_ = L_self_state_list_L_self_state_keys_14_exp_avg_
+	        l_self_state_list_l_self_state_keys_15_exp_avg_ = L_self_state_list_L_self_state_keys_15_exp_avg_
+	        l_self_state_list_l_self_state_keys_16_exp_avg_ = L_self_state_list_L_self_state_keys_16_exp_avg_
+	        l_self_state_list_l_self_state_keys_17_exp_avg_ = L_self_state_list_L_self_state_keys_17_exp_avg_
+	        l_self_state_list_l_self_state_keys_18_exp_avg_ = L_self_state_list_L_self_state_keys_18_exp_avg_
+	        l_self_state_list_l_self_state_keys_19_exp_avg_ = L_self_state_list_L_self_state_keys_19_exp_avg_
+	        l_self_state_list_l_self_state_keys_20_exp_avg_ = L_self_state_list_L_self_state_keys_20_exp_avg_
+	        l_self_state_list_l_self_state_keys_21_exp_avg_ = L_self_state_list_L_self_state_keys_21_exp_avg_
+	        l_self_state_list_l_self_state_keys_22_exp_avg_ = L_self_state_list_L_self_state_keys_22_exp_avg_
+	        l_self_state_list_l_self_state_keys_23_exp_avg_ = L_self_state_list_L_self_state_keys_23_exp_avg_
+	        l_self_state_list_l_self_state_keys_24_exp_avg_ = L_self_state_list_L_self_state_keys_24_exp_avg_
+	        l_self_state_list_l_self_state_keys_25_exp_avg_ = L_self_state_list_L_self_state_keys_25_exp_avg_
+	        l_self_state_list_l_self_state_keys_26_exp_avg_ = L_self_state_list_L_self_state_keys_26_exp_avg_
+	        l_self_state_list_l_self_state_keys_27_exp_avg_ = L_self_state_list_L_self_state_keys_27_exp_avg_
+	        l_self_state_list_l_self_state_keys_28_exp_avg_ = L_self_state_list_L_self_state_keys_28_exp_avg_
+	        l_self_state_list_l_self_state_keys_29_exp_avg_ = L_self_state_list_L_self_state_keys_29_exp_avg_
+	        l_self_state_list_l_self_state_keys_30_exp_avg_ = L_self_state_list_L_self_state_keys_30_exp_avg_
+	        l_self_state_list_l_self_state_keys_31_exp_avg_ = L_self_state_list_L_self_state_keys_31_exp_avg_
+	        l_self_state_list_l_self_state_keys_32_exp_avg_ = L_self_state_list_L_self_state_keys_32_exp_avg_
+	        l_self_state_list_l_self_state_keys_33_exp_avg_ = L_self_state_list_L_self_state_keys_33_exp_avg_
+	        l_self_state_list_l_self_state_keys_34_exp_avg_ = L_self_state_list_L_self_state_keys_34_exp_avg_
+	        l_self_state_list_l_self_state_keys_35_exp_avg_ = L_self_state_list_L_self_state_keys_35_exp_avg_
+	        l_self_state_list_l_self_state_keys_36_exp_avg_ = L_self_state_list_L_self_state_keys_36_exp_avg_
+	        l_self_state_list_l_self_state_keys_37_exp_avg_ = L_self_state_list_L_self_state_keys_37_exp_avg_
+	        l_self_state_list_l_self_state_keys_38_exp_avg_ = L_self_state_list_L_self_state_keys_38_exp_avg_
+	        l_self_state_list_l_self_state_keys_39_exp_avg_ = L_self_state_list_L_self_state_keys_39_exp_avg_
+	        l_self_state_list_l_self_state_keys_40_exp_avg_ = L_self_state_list_L_self_state_keys_40_exp_avg_
+	        l_self_state_list_l_self_state_keys_41_exp_avg_ = L_self_state_list_L_self_state_keys_41_exp_avg_
+	        l_self_state_list_l_self_state_keys_42_exp_avg_ = L_self_state_list_L_self_state_keys_42_exp_avg_
+	        l_self_state_list_l_self_state_keys_43_exp_avg_ = L_self_state_list_L_self_state_keys_43_exp_avg_
+	        l_self_state_list_l_self_state_keys_44_exp_avg_ = L_self_state_list_L_self_state_keys_44_exp_avg_
+	        l_self_state_list_l_self_state_keys_45_exp_avg_ = L_self_state_list_L_self_state_keys_45_exp_avg_
+	        l_self_state_list_l_self_state_keys_46_exp_avg_ = L_self_state_list_L_self_state_keys_46_exp_avg_
+	        l_self_state_list_l_self_state_keys_47_exp_avg_ = L_self_state_list_L_self_state_keys_47_exp_avg_
+	        l_self_state_list_l_self_state_keys_48_exp_avg_ = L_self_state_list_L_self_state_keys_48_exp_avg_
+	        l_self_state_list_l_self_state_keys_49_exp_avg_ = L_self_state_list_L_self_state_keys_49_exp_avg_
+	        l_self_state_list_l_self_state_keys_50_exp_avg_ = L_self_state_list_L_self_state_keys_50_exp_avg_
+	        l_self_state_list_l_self_state_keys_51_exp_avg_ = L_self_state_list_L_self_state_keys_51_exp_avg_
+	        l_self_state_list_l_self_state_keys_52_exp_avg_ = L_self_state_list_L_self_state_keys_52_exp_avg_
+	        l_self_state_list_l_self_state_keys_53_exp_avg_ = L_self_state_list_L_self_state_keys_53_exp_avg_
+	        l_self_state_list_l_self_state_keys_54_exp_avg_ = L_self_state_list_L_self_state_keys_54_exp_avg_
+	        l_self_state_list_l_self_state_keys_55_exp_avg_ = L_self_state_list_L_self_state_keys_55_exp_avg_
+	        l_self_state_list_l_self_state_keys_56_exp_avg_ = L_self_state_list_L_self_state_keys_56_exp_avg_
+	        l_self_state_list_l_self_state_keys_57_exp_avg_ = L_self_state_list_L_self_state_keys_57_exp_avg_
+	        l_self_state_list_l_self_state_keys_58_exp_avg_ = L_self_state_list_L_self_state_keys_58_exp_avg_
+	        l_self_state_list_l_self_state_keys_59_exp_avg_ = L_self_state_list_L_self_state_keys_59_exp_avg_
+	        l_self_state_list_l_self_state_keys_60_exp_avg_ = L_self_state_list_L_self_state_keys_60_exp_avg_
+	        l_self_state_list_l_self_state_keys_61_exp_avg_ = L_self_state_list_L_self_state_keys_61_exp_avg_
+	        l_self_state_list_l_self_state_keys_62_exp_avg_ = L_self_state_list_L_self_state_keys_62_exp_avg_
+	        l_self_state_list_l_self_state_keys_63_exp_avg_ = L_self_state_list_L_self_state_keys_63_exp_avg_
+	        l_self_state_list_l_self_state_keys_64_exp_avg_ = L_self_state_list_L_self_state_keys_64_exp_avg_
+	        l_self_state_list_l_self_state_keys_65_exp_avg_ = L_self_state_list_L_self_state_keys_65_exp_avg_
+	        l_self_state_list_l_self_state_keys_66_exp_avg_ = L_self_state_list_L_self_state_keys_66_exp_avg_
+	        l_self_state_list_l_self_state_keys_67_exp_avg_ = L_self_state_list_L_self_state_keys_67_exp_avg_
+	        l_self_state_list_l_self_state_keys_68_exp_avg_ = L_self_state_list_L_self_state_keys_68_exp_avg_
+	        l_self_state_list_l_self_state_keys_69_exp_avg_ = L_self_state_list_L_self_state_keys_69_exp_avg_
+	        l_self_state_list_l_self_state_keys_70_exp_avg_ = L_self_state_list_L_self_state_keys_70_exp_avg_
+	        l_self_state_list_l_self_state_keys_71_exp_avg_ = L_self_state_list_L_self_state_keys_71_exp_avg_
+	        l_self_state_list_l_self_state_keys_72_exp_avg_ = L_self_state_list_L_self_state_keys_72_exp_avg_
+	        l_self_state_list_l_self_state_keys_73_exp_avg_ = L_self_state_list_L_self_state_keys_73_exp_avg_
+	        l_self_state_list_l_self_state_keys_74_exp_avg_ = L_self_state_list_L_self_state_keys_74_exp_avg_
+	        l_self_state_list_l_self_state_keys_75_exp_avg_ = L_self_state_list_L_self_state_keys_75_exp_avg_
+	        l_self_state_list_l_self_state_keys_76_exp_avg_ = L_self_state_list_L_self_state_keys_76_exp_avg_
+	        l_self_state_list_l_self_state_keys_77_exp_avg_ = L_self_state_list_L_self_state_keys_77_exp_avg_
+	        l_self_state_list_l_self_state_keys_78_exp_avg_ = L_self_state_list_L_self_state_keys_78_exp_avg_
+	        l_self_state_list_l_self_state_keys_79_exp_avg_ = L_self_state_list_L_self_state_keys_79_exp_avg_
+	        l_self_state_list_l_self_state_keys_80_exp_avg_ = L_self_state_list_L_self_state_keys_80_exp_avg_
+	        l_self_state_list_l_self_state_keys_81_exp_avg_ = L_self_state_list_L_self_state_keys_81_exp_avg_
+	        l_self_state_list_l_self_state_keys_82_exp_avg_ = L_self_state_list_L_self_state_keys_82_exp_avg_
+	        l_self_state_list_l_self_state_keys_83_exp_avg_ = L_self_state_list_L_self_state_keys_83_exp_avg_
+	        l_self_state_list_l_self_state_keys_84_exp_avg_ = L_self_state_list_L_self_state_keys_84_exp_avg_
+	        l_self_state_list_l_self_state_keys_85_exp_avg_ = L_self_state_list_L_self_state_keys_85_exp_avg_
+	        l_self_state_list_l_self_state_keys_86_exp_avg_ = L_self_state_list_L_self_state_keys_86_exp_avg_
+	        l_self_state_list_l_self_state_keys_87_exp_avg_ = L_self_state_list_L_self_state_keys_87_exp_avg_
+	        l_self_state_list_l_self_state_keys_88_exp_avg_ = L_self_state_list_L_self_state_keys_88_exp_avg_
+	        l_self_state_list_l_self_state_keys_89_exp_avg_ = L_self_state_list_L_self_state_keys_89_exp_avg_
+	        l_self_state_list_l_self_state_keys_90_exp_avg_ = L_self_state_list_L_self_state_keys_90_exp_avg_
+	        l_self_state_list_l_self_state_keys_91_exp_avg_ = L_self_state_list_L_self_state_keys_91_exp_avg_
+	        l_self_state_list_l_self_state_keys_92_exp_avg_ = L_self_state_list_L_self_state_keys_92_exp_avg_
+	        l_self_state_list_l_self_state_keys_93_exp_avg_ = L_self_state_list_L_self_state_keys_93_exp_avg_
+	        l_self_state_list_l_self_state_keys_94_exp_avg_ = L_self_state_list_L_self_state_keys_94_exp_avg_
+	        l_self_state_list_l_self_state_keys_95_exp_avg_ = L_self_state_list_L_self_state_keys_95_exp_avg_
+	        l_self_state_list_l_self_state_keys_96_exp_avg_ = L_self_state_list_L_self_state_keys_96_exp_avg_
+	        l_self_state_list_l_self_state_keys_97_exp_avg_ = L_self_state_list_L_self_state_keys_97_exp_avg_
+	        l_self_state_list_l_self_state_keys_98_exp_avg_ = L_self_state_list_L_self_state_keys_98_exp_avg_
+	        l_self_state_list_l_self_state_keys_99_exp_avg_ = L_self_state_list_L_self_state_keys_99_exp_avg_
+	        l_self_state_list_l_self_state_keys_100_exp_avg_ = L_self_state_list_L_self_state_keys_100_exp_avg_
+	        l_self_state_list_l_self_state_keys_101_exp_avg_ = L_self_state_list_L_self_state_keys_101_exp_avg_
+	        l_self_state_list_l_self_state_keys_102_exp_avg_ = L_self_state_list_L_self_state_keys_102_exp_avg_
+	        l_self_state_list_l_self_state_keys_103_exp_avg_ = L_self_state_list_L_self_state_keys_103_exp_avg_
+	        l_self_state_list_l_self_state_keys_104_exp_avg_ = L_self_state_list_L_self_state_keys_104_exp_avg_
+	        l_self_state_list_l_self_state_keys_105_exp_avg_ = L_self_state_list_L_self_state_keys_105_exp_avg_
+	        l_self_state_list_l_self_state_keys_106_exp_avg_ = L_self_state_list_L_self_state_keys_106_exp_avg_
+	        l_self_state_list_l_self_state_keys_107_exp_avg_ = L_self_state_list_L_self_state_keys_107_exp_avg_
+	        l_self_state_list_l_self_state_keys_108_exp_avg_ = L_self_state_list_L_self_state_keys_108_exp_avg_
+	        l_self_state_list_l_self_state_keys_109_exp_avg_ = L_self_state_list_L_self_state_keys_109_exp_avg_
+	        l_self_state_list_l_self_state_keys_110_exp_avg_ = L_self_state_list_L_self_state_keys_110_exp_avg_
+	        l_self_state_list_l_self_state_keys_111_exp_avg_ = L_self_state_list_L_self_state_keys_111_exp_avg_
+	        l_self_state_list_l_self_state_keys_112_exp_avg_ = L_self_state_list_L_self_state_keys_112_exp_avg_
+	        l_self_state_list_l_self_state_keys_113_exp_avg_ = L_self_state_list_L_self_state_keys_113_exp_avg_
+	        l_self_state_list_l_self_state_keys_114_exp_avg_ = L_self_state_list_L_self_state_keys_114_exp_avg_
+	        l_self_state_list_l_self_state_keys_115_exp_avg_ = L_self_state_list_L_self_state_keys_115_exp_avg_
+	        l_self_state_list_l_self_state_keys_116_exp_avg_ = L_self_state_list_L_self_state_keys_116_exp_avg_
+	        l_self_state_list_l_self_state_keys_117_exp_avg_ = L_self_state_list_L_self_state_keys_117_exp_avg_
+	        l_self_state_list_l_self_state_keys_118_exp_avg_ = L_self_state_list_L_self_state_keys_118_exp_avg_
+	        l_self_state_list_l_self_state_keys_119_exp_avg_ = L_self_state_list_L_self_state_keys_119_exp_avg_
+	        l_self_state_list_l_self_state_keys_120_exp_avg_ = L_self_state_list_L_self_state_keys_120_exp_avg_
+	        l_self_state_list_l_self_state_keys_121_exp_avg_ = L_self_state_list_L_self_state_keys_121_exp_avg_
+	        l_self_state_list_l_self_state_keys_122_exp_avg_ = L_self_state_list_L_self_state_keys_122_exp_avg_
+	        l_self_state_list_l_self_state_keys_123_exp_avg_ = L_self_state_list_L_self_state_keys_123_exp_avg_
+	        l_self_state_list_l_self_state_keys_124_exp_avg_ = L_self_state_list_L_self_state_keys_124_exp_avg_
+	        l_self_state_list_l_self_state_keys_125_exp_avg_ = L_self_state_list_L_self_state_keys_125_exp_avg_
+	        l_self_state_list_l_self_state_keys_126_exp_avg_ = L_self_state_list_L_self_state_keys_126_exp_avg_
+	        l_self_state_list_l_self_state_keys_127_exp_avg_ = L_self_state_list_L_self_state_keys_127_exp_avg_
+	        l_self_state_list_l_self_state_keys_128_exp_avg_ = L_self_state_list_L_self_state_keys_128_exp_avg_
+	        l_self_state_list_l_self_state_keys_129_exp_avg_ = L_self_state_list_L_self_state_keys_129_exp_avg_
+	        l_self_state_list_l_self_state_keys_130_exp_avg_ = L_self_state_list_L_self_state_keys_130_exp_avg_
+	        l_self_state_list_l_self_state_keys_131_exp_avg_ = L_self_state_list_L_self_state_keys_131_exp_avg_
+	        l_self_state_list_l_self_state_keys_132_exp_avg_ = L_self_state_list_L_self_state_keys_132_exp_avg_
+	        l_self_state_list_l_self_state_keys_133_exp_avg_ = L_self_state_list_L_self_state_keys_133_exp_avg_
+	        l_self_state_list_l_self_state_keys_134_exp_avg_ = L_self_state_list_L_self_state_keys_134_exp_avg_
+	        l_self_state_list_l_self_state_keys_135_exp_avg_ = L_self_state_list_L_self_state_keys_135_exp_avg_
+	        l_self_state_list_l_self_state_keys_136_exp_avg_ = L_self_state_list_L_self_state_keys_136_exp_avg_
+	        l_self_state_list_l_self_state_keys_137_exp_avg_ = L_self_state_list_L_self_state_keys_137_exp_avg_
+	        l_self_state_list_l_self_state_keys_138_exp_avg_ = L_self_state_list_L_self_state_keys_138_exp_avg_
+	        l_self_state_list_l_self_state_keys_139_exp_avg_ = L_self_state_list_L_self_state_keys_139_exp_avg_
+	        l_self_state_list_l_self_state_keys_140_exp_avg_ = L_self_state_list_L_self_state_keys_140_exp_avg_
+	        l_self_state_list_l_self_state_keys_141_exp_avg_ = L_self_state_list_L_self_state_keys_141_exp_avg_
+	        l_self_state_list_l_self_state_keys_142_exp_avg_ = L_self_state_list_L_self_state_keys_142_exp_avg_
+	        l_self_state_list_l_self_state_keys_143_exp_avg_ = L_self_state_list_L_self_state_keys_143_exp_avg_
+	        l_self_state_list_l_self_state_keys_144_exp_avg_ = L_self_state_list_L_self_state_keys_144_exp_avg_
+	        l_self_state_list_l_self_state_keys_145_exp_avg_ = L_self_state_list_L_self_state_keys_145_exp_avg_
+	        l_self_state_list_l_self_state_keys_146_exp_avg_ = L_self_state_list_L_self_state_keys_146_exp_avg_
+	        l_self_state_list_l_self_state_keys_147_exp_avg_ = L_self_state_list_L_self_state_keys_147_exp_avg_
+	        l_self_state_list_l_self_state_keys_0_exp_avg_sq_ = L_self_state_list_L_self_state_keys_0_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_2_exp_avg_sq_ = L_self_state_list_L_self_state_keys_2_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_3_exp_avg_sq_ = L_self_state_list_L_self_state_keys_3_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_4_exp_avg_sq_ = L_self_state_list_L_self_state_keys_4_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_5_exp_avg_sq_ = L_self_state_list_L_self_state_keys_5_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_6_exp_avg_sq_ = L_self_state_list_L_self_state_keys_6_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_7_exp_avg_sq_ = L_self_state_list_L_self_state_keys_7_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_8_exp_avg_sq_ = L_self_state_list_L_self_state_keys_8_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_9_exp_avg_sq_ = L_self_state_list_L_self_state_keys_9_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_10_exp_avg_sq_ = L_self_state_list_L_self_state_keys_10_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_11_exp_avg_sq_ = L_self_state_list_L_self_state_keys_11_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_12_exp_avg_sq_ = L_self_state_list_L_self_state_keys_12_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_13_exp_avg_sq_ = L_self_state_list_L_self_state_keys_13_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_14_exp_avg_sq_ = L_self_state_list_L_self_state_keys_14_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_15_exp_avg_sq_ = L_self_state_list_L_self_state_keys_15_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_16_exp_avg_sq_ = L_self_state_list_L_self_state_keys_16_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_17_exp_avg_sq_ = L_self_state_list_L_self_state_keys_17_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_18_exp_avg_sq_ = L_self_state_list_L_self_state_keys_18_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_19_exp_avg_sq_ = L_self_state_list_L_self_state_keys_19_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_20_exp_avg_sq_ = L_self_state_list_L_self_state_keys_20_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_21_exp_avg_sq_ = L_self_state_list_L_self_state_keys_21_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_22_exp_avg_sq_ = L_self_state_list_L_self_state_keys_22_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_23_exp_avg_sq_ = L_self_state_list_L_self_state_keys_23_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_24_exp_avg_sq_ = L_self_state_list_L_self_state_keys_24_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_25_exp_avg_sq_ = L_self_state_list_L_self_state_keys_25_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_26_exp_avg_sq_ = L_self_state_list_L_self_state_keys_26_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_27_exp_avg_sq_ = L_self_state_list_L_self_state_keys_27_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_28_exp_avg_sq_ = L_self_state_list_L_self_state_keys_28_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_29_exp_avg_sq_ = L_self_state_list_L_self_state_keys_29_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_30_exp_avg_sq_ = L_self_state_list_L_self_state_keys_30_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_31_exp_avg_sq_ = L_self_state_list_L_self_state_keys_31_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_32_exp_avg_sq_ = L_self_state_list_L_self_state_keys_32_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_33_exp_avg_sq_ = L_self_state_list_L_self_state_keys_33_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_34_exp_avg_sq_ = L_self_state_list_L_self_state_keys_34_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_35_exp_avg_sq_ = L_self_state_list_L_self_state_keys_35_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_36_exp_avg_sq_ = L_self_state_list_L_self_state_keys_36_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_37_exp_avg_sq_ = L_self_state_list_L_self_state_keys_37_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_38_exp_avg_sq_ = L_self_state_list_L_self_state_keys_38_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_39_exp_avg_sq_ = L_self_state_list_L_self_state_keys_39_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_40_exp_avg_sq_ = L_self_state_list_L_self_state_keys_40_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_41_exp_avg_sq_ = L_self_state_list_L_self_state_keys_41_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_42_exp_avg_sq_ = L_self_state_list_L_self_state_keys_42_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_43_exp_avg_sq_ = L_self_state_list_L_self_state_keys_43_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_44_exp_avg_sq_ = L_self_state_list_L_self_state_keys_44_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_45_exp_avg_sq_ = L_self_state_list_L_self_state_keys_45_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_46_exp_avg_sq_ = L_self_state_list_L_self_state_keys_46_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_47_exp_avg_sq_ = L_self_state_list_L_self_state_keys_47_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_48_exp_avg_sq_ = L_self_state_list_L_self_state_keys_48_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_49_exp_avg_sq_ = L_self_state_list_L_self_state_keys_49_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_50_exp_avg_sq_ = L_self_state_list_L_self_state_keys_50_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_51_exp_avg_sq_ = L_self_state_list_L_self_state_keys_51_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_52_exp_avg_sq_ = L_self_state_list_L_self_state_keys_52_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_53_exp_avg_sq_ = L_self_state_list_L_self_state_keys_53_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_54_exp_avg_sq_ = L_self_state_list_L_self_state_keys_54_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_55_exp_avg_sq_ = L_self_state_list_L_self_state_keys_55_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_56_exp_avg_sq_ = L_self_state_list_L_self_state_keys_56_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_57_exp_avg_sq_ = L_self_state_list_L_self_state_keys_57_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_58_exp_avg_sq_ = L_self_state_list_L_self_state_keys_58_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_59_exp_avg_sq_ = L_self_state_list_L_self_state_keys_59_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_60_exp_avg_sq_ = L_self_state_list_L_self_state_keys_60_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_61_exp_avg_sq_ = L_self_state_list_L_self_state_keys_61_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_62_exp_avg_sq_ = L_self_state_list_L_self_state_keys_62_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_63_exp_avg_sq_ = L_self_state_list_L_self_state_keys_63_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_64_exp_avg_sq_ = L_self_state_list_L_self_state_keys_64_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_65_exp_avg_sq_ = L_self_state_list_L_self_state_keys_65_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_66_exp_avg_sq_ = L_self_state_list_L_self_state_keys_66_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_67_exp_avg_sq_ = L_self_state_list_L_self_state_keys_67_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_68_exp_avg_sq_ = L_self_state_list_L_self_state_keys_68_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_69_exp_avg_sq_ = L_self_state_list_L_self_state_keys_69_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_70_exp_avg_sq_ = L_self_state_list_L_self_state_keys_70_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_71_exp_avg_sq_ = L_self_state_list_L_self_state_keys_71_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_72_exp_avg_sq_ = L_self_state_list_L_self_state_keys_72_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_73_exp_avg_sq_ = L_self_state_list_L_self_state_keys_73_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_74_exp_avg_sq_ = L_self_state_list_L_self_state_keys_74_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_75_exp_avg_sq_ = L_self_state_list_L_self_state_keys_75_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_76_exp_avg_sq_ = L_self_state_list_L_self_state_keys_76_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_77_exp_avg_sq_ = L_self_state_list_L_self_state_keys_77_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_78_exp_avg_sq_ = L_self_state_list_L_self_state_keys_78_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_79_exp_avg_sq_ = L_self_state_list_L_self_state_keys_79_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_80_exp_avg_sq_ = L_self_state_list_L_self_state_keys_80_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_81_exp_avg_sq_ = L_self_state_list_L_self_state_keys_81_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_82_exp_avg_sq_ = L_self_state_list_L_self_state_keys_82_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_83_exp_avg_sq_ = L_self_state_list_L_self_state_keys_83_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_84_exp_avg_sq_ = L_self_state_list_L_self_state_keys_84_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_85_exp_avg_sq_ = L_self_state_list_L_self_state_keys_85_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_86_exp_avg_sq_ = L_self_state_list_L_self_state_keys_86_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_87_exp_avg_sq_ = L_self_state_list_L_self_state_keys_87_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_88_exp_avg_sq_ = L_self_state_list_L_self_state_keys_88_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_89_exp_avg_sq_ = L_self_state_list_L_self_state_keys_89_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_90_exp_avg_sq_ = L_self_state_list_L_self_state_keys_90_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_91_exp_avg_sq_ = L_self_state_list_L_self_state_keys_91_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_92_exp_avg_sq_ = L_self_state_list_L_self_state_keys_92_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_93_exp_avg_sq_ = L_self_state_list_L_self_state_keys_93_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_94_exp_avg_sq_ = L_self_state_list_L_self_state_keys_94_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_95_exp_avg_sq_ = L_self_state_list_L_self_state_keys_95_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_96_exp_avg_sq_ = L_self_state_list_L_self_state_keys_96_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_97_exp_avg_sq_ = L_self_state_list_L_self_state_keys_97_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_98_exp_avg_sq_ = L_self_state_list_L_self_state_keys_98_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_99_exp_avg_sq_ = L_self_state_list_L_self_state_keys_99_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_100_exp_avg_sq_ = L_self_state_list_L_self_state_keys_100_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_101_exp_avg_sq_ = L_self_state_list_L_self_state_keys_101_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_102_exp_avg_sq_ = L_self_state_list_L_self_state_keys_102_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_103_exp_avg_sq_ = L_self_state_list_L_self_state_keys_103_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_104_exp_avg_sq_ = L_self_state_list_L_self_state_keys_104_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_105_exp_avg_sq_ = L_self_state_list_L_self_state_keys_105_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_106_exp_avg_sq_ = L_self_state_list_L_self_state_keys_106_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_107_exp_avg_sq_ = L_self_state_list_L_self_state_keys_107_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_108_exp_avg_sq_ = L_self_state_list_L_self_state_keys_108_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_109_exp_avg_sq_ = L_self_state_list_L_self_state_keys_109_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_110_exp_avg_sq_ = L_self_state_list_L_self_state_keys_110_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_111_exp_avg_sq_ = L_self_state_list_L_self_state_keys_111_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_112_exp_avg_sq_ = L_self_state_list_L_self_state_keys_112_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_113_exp_avg_sq_ = L_self_state_list_L_self_state_keys_113_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_114_exp_avg_sq_ = L_self_state_list_L_self_state_keys_114_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_115_exp_avg_sq_ = L_self_state_list_L_self_state_keys_115_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_116_exp_avg_sq_ = L_self_state_list_L_self_state_keys_116_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_117_exp_avg_sq_ = L_self_state_list_L_self_state_keys_117_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_118_exp_avg_sq_ = L_self_state_list_L_self_state_keys_118_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_119_exp_avg_sq_ = L_self_state_list_L_self_state_keys_119_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_120_exp_avg_sq_ = L_self_state_list_L_self_state_keys_120_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_121_exp_avg_sq_ = L_self_state_list_L_self_state_keys_121_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_122_exp_avg_sq_ = L_self_state_list_L_self_state_keys_122_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_123_exp_avg_sq_ = L_self_state_list_L_self_state_keys_123_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_124_exp_avg_sq_ = L_self_state_list_L_self_state_keys_124_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_125_exp_avg_sq_ = L_self_state_list_L_self_state_keys_125_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_126_exp_avg_sq_ = L_self_state_list_L_self_state_keys_126_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_127_exp_avg_sq_ = L_self_state_list_L_self_state_keys_127_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_128_exp_avg_sq_ = L_self_state_list_L_self_state_keys_128_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_129_exp_avg_sq_ = L_self_state_list_L_self_state_keys_129_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_130_exp_avg_sq_ = L_self_state_list_L_self_state_keys_130_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_131_exp_avg_sq_ = L_self_state_list_L_self_state_keys_131_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_132_exp_avg_sq_ = L_self_state_list_L_self_state_keys_132_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_133_exp_avg_sq_ = L_self_state_list_L_self_state_keys_133_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_134_exp_avg_sq_ = L_self_state_list_L_self_state_keys_134_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_135_exp_avg_sq_ = L_self_state_list_L_self_state_keys_135_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_136_exp_avg_sq_ = L_self_state_list_L_self_state_keys_136_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_137_exp_avg_sq_ = L_self_state_list_L_self_state_keys_137_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_138_exp_avg_sq_ = L_self_state_list_L_self_state_keys_138_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_139_exp_avg_sq_ = L_self_state_list_L_self_state_keys_139_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_140_exp_avg_sq_ = L_self_state_list_L_self_state_keys_140_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_141_exp_avg_sq_ = L_self_state_list_L_self_state_keys_141_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_142_exp_avg_sq_ = L_self_state_list_L_self_state_keys_142_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_143_exp_avg_sq_ = L_self_state_list_L_self_state_keys_143_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_144_exp_avg_sq_ = L_self_state_list_L_self_state_keys_144_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_145_exp_avg_sq_ = L_self_state_list_L_self_state_keys_145_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_146_exp_avg_sq_ = L_self_state_list_L_self_state_keys_146_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_147_exp_avg_sq_ = L_self_state_list_L_self_state_keys_147_exp_avg_sq_
+	        l_self_state_list_l_self_state_keys_0_step_ = L_self_state_list_L_self_state_keys_0_step_
+	        l_self_state_list_l_self_state_keys_2_step_ = L_self_state_list_L_self_state_keys_2_step_
+	        l_self_state_list_l_self_state_keys_3_step_ = L_self_state_list_L_self_state_keys_3_step_
+	        l_self_state_list_l_self_state_keys_4_step_ = L_self_state_list_L_self_state_keys_4_step_
+	        l_self_state_list_l_self_state_keys_5_step_ = L_self_state_list_L_self_state_keys_5_step_
+	        l_self_state_list_l_self_state_keys_6_step_ = L_self_state_list_L_self_state_keys_6_step_
+	        l_self_state_list_l_self_state_keys_7_step_ = L_self_state_list_L_self_state_keys_7_step_
+	        l_self_state_list_l_self_state_keys_8_step_ = L_self_state_list_L_self_state_keys_8_step_
+	        l_self_state_list_l_self_state_keys_9_step_ = L_self_state_list_L_self_state_keys_9_step_
+	        l_self_state_list_l_self_state_keys_10_step_ = L_self_state_list_L_self_state_keys_10_step_
+	        l_self_state_list_l_self_state_keys_11_step_ = L_self_state_list_L_self_state_keys_11_step_
+	        l_self_state_list_l_self_state_keys_12_step_ = L_self_state_list_L_self_state_keys_12_step_
+	        l_self_state_list_l_self_state_keys_13_step_ = L_self_state_list_L_self_state_keys_13_step_
+	        l_self_state_list_l_self_state_keys_14_step_ = L_self_state_list_L_self_state_keys_14_step_
+	        l_self_state_list_l_self_state_keys_15_step_ = L_self_state_list_L_self_state_keys_15_step_
+	        l_self_state_list_l_self_state_keys_16_step_ = L_self_state_list_L_self_state_keys_16_step_
+	        l_self_state_list_l_self_state_keys_17_step_ = L_self_state_list_L_self_state_keys_17_step_
+	        l_self_state_list_l_self_state_keys_18_step_ = L_self_state_list_L_self_state_keys_18_step_
+	        l_self_state_list_l_self_state_keys_19_step_ = L_self_state_list_L_self_state_keys_19_step_
+	        l_self_state_list_l_self_state_keys_20_step_ = L_self_state_list_L_self_state_keys_20_step_
+	        l_self_state_list_l_self_state_keys_21_step_ = L_self_state_list_L_self_state_keys_21_step_
+	        l_self_state_list_l_self_state_keys_22_step_ = L_self_state_list_L_self_state_keys_22_step_
+	        l_self_state_list_l_self_state_keys_23_step_ = L_self_state_list_L_self_state_keys_23_step_
+	        l_self_state_list_l_self_state_keys_24_step_ = L_self_state_list_L_self_state_keys_24_step_
+	        l_self_state_list_l_self_state_keys_25_step_ = L_self_state_list_L_self_state_keys_25_step_
+	        l_self_state_list_l_self_state_keys_26_step_ = L_self_state_list_L_self_state_keys_26_step_
+	        l_self_state_list_l_self_state_keys_27_step_ = L_self_state_list_L_self_state_keys_27_step_
+	        l_self_state_list_l_self_state_keys_28_step_ = L_self_state_list_L_self_state_keys_28_step_
+	        l_self_state_list_l_self_state_keys_29_step_ = L_self_state_list_L_self_state_keys_29_step_
+	        l_self_state_list_l_self_state_keys_30_step_ = L_self_state_list_L_self_state_keys_30_step_
+	        l_self_state_list_l_self_state_keys_31_step_ = L_self_state_list_L_self_state_keys_31_step_
+	        l_self_state_list_l_self_state_keys_32_step_ = L_self_state_list_L_self_state_keys_32_step_
+	        l_self_state_list_l_self_state_keys_33_step_ = L_self_state_list_L_self_state_keys_33_step_
+	        l_self_state_list_l_self_state_keys_34_step_ = L_self_state_list_L_self_state_keys_34_step_
+	        l_self_state_list_l_self_state_keys_35_step_ = L_self_state_list_L_self_state_keys_35_step_
+	        l_self_state_list_l_self_state_keys_36_step_ = L_self_state_list_L_self_state_keys_36_step_
+	        l_self_state_list_l_self_state_keys_37_step_ = L_self_state_list_L_self_state_keys_37_step_
+	        l_self_state_list_l_self_state_keys_38_step_ = L_self_state_list_L_self_state_keys_38_step_
+	        l_self_state_list_l_self_state_keys_39_step_ = L_self_state_list_L_self_state_keys_39_step_
+	        l_self_state_list_l_self_state_keys_40_step_ = L_self_state_list_L_self_state_keys_40_step_
+	        l_self_state_list_l_self_state_keys_41_step_ = L_self_state_list_L_self_state_keys_41_step_
+	        l_self_state_list_l_self_state_keys_42_step_ = L_self_state_list_L_self_state_keys_42_step_
+	        l_self_state_list_l_self_state_keys_43_step_ = L_self_state_list_L_self_state_keys_43_step_
+	        l_self_state_list_l_self_state_keys_44_step_ = L_self_state_list_L_self_state_keys_44_step_
+	        l_self_state_list_l_self_state_keys_45_step_ = L_self_state_list_L_self_state_keys_45_step_
+	        l_self_state_list_l_self_state_keys_46_step_ = L_self_state_list_L_self_state_keys_46_step_
+	        l_self_state_list_l_self_state_keys_47_step_ = L_self_state_list_L_self_state_keys_47_step_
+	        l_self_state_list_l_self_state_keys_48_step_ = L_self_state_list_L_self_state_keys_48_step_
+	        l_self_state_list_l_self_state_keys_49_step_ = L_self_state_list_L_self_state_keys_49_step_
+	        l_self_state_list_l_self_state_keys_50_step_ = L_self_state_list_L_self_state_keys_50_step_
+	        l_self_state_list_l_self_state_keys_51_step_ = L_self_state_list_L_self_state_keys_51_step_
+	        l_self_state_list_l_self_state_keys_52_step_ = L_self_state_list_L_self_state_keys_52_step_
+	        l_self_state_list_l_self_state_keys_53_step_ = L_self_state_list_L_self_state_keys_53_step_
+	        l_self_state_list_l_self_state_keys_54_step_ = L_self_state_list_L_self_state_keys_54_step_
+	        l_self_state_list_l_self_state_keys_55_step_ = L_self_state_list_L_self_state_keys_55_step_
+	        l_self_state_list_l_self_state_keys_56_step_ = L_self_state_list_L_self_state_keys_56_step_
+	        l_self_state_list_l_self_state_keys_57_step_ = L_self_state_list_L_self_state_keys_57_step_
+	        l_self_state_list_l_self_state_keys_58_step_ = L_self_state_list_L_self_state_keys_58_step_
+	        l_self_state_list_l_self_state_keys_59_step_ = L_self_state_list_L_self_state_keys_59_step_
+	        l_self_state_list_l_self_state_keys_60_step_ = L_self_state_list_L_self_state_keys_60_step_
+	        l_self_state_list_l_self_state_keys_61_step_ = L_self_state_list_L_self_state_keys_61_step_
+	        l_self_state_list_l_self_state_keys_62_step_ = L_self_state_list_L_self_state_keys_62_step_
+	        l_self_state_list_l_self_state_keys_63_step_ = L_self_state_list_L_self_state_keys_63_step_
+	        l_self_state_list_l_self_state_keys_64_step_ = L_self_state_list_L_self_state_keys_64_step_
+	        l_self_state_list_l_self_state_keys_65_step_ = L_self_state_list_L_self_state_keys_65_step_
+	        l_self_state_list_l_self_state_keys_66_step_ = L_self_state_list_L_self_state_keys_66_step_
+	        l_self_state_list_l_self_state_keys_67_step_ = L_self_state_list_L_self_state_keys_67_step_
+	        l_self_state_list_l_self_state_keys_68_step_ = L_self_state_list_L_self_state_keys_68_step_
+	        l_self_state_list_l_self_state_keys_69_step_ = L_self_state_list_L_self_state_keys_69_step_
+	        l_self_state_list_l_self_state_keys_70_step_ = L_self_state_list_L_self_state_keys_70_step_
+	        l_self_state_list_l_self_state_keys_71_step_ = L_self_state_list_L_self_state_keys_71_step_
+	        l_self_state_list_l_self_state_keys_72_step_ = L_self_state_list_L_self_state_keys_72_step_
+	        l_self_state_list_l_self_state_keys_73_step_ = L_self_state_list_L_self_state_keys_73_step_
+	        l_self_state_list_l_self_state_keys_74_step_ = L_self_state_list_L_self_state_keys_74_step_
+	        l_self_state_list_l_self_state_keys_75_step_ = L_self_state_list_L_self_state_keys_75_step_
+	        l_self_state_list_l_self_state_keys_76_step_ = L_self_state_list_L_self_state_keys_76_step_
+	        l_self_state_list_l_self_state_keys_77_step_ = L_self_state_list_L_self_state_keys_77_step_
+	        l_self_state_list_l_self_state_keys_78_step_ = L_self_state_list_L_self_state_keys_78_step_
+	        l_self_state_list_l_self_state_keys_79_step_ = L_self_state_list_L_self_state_keys_79_step_
+	        l_self_state_list_l_self_state_keys_80_step_ = L_self_state_list_L_self_state_keys_80_step_
+	        l_self_state_list_l_self_state_keys_81_step_ = L_self_state_list_L_self_state_keys_81_step_
+	        l_self_state_list_l_self_state_keys_82_step_ = L_self_state_list_L_self_state_keys_82_step_
+	        l_self_state_list_l_self_state_keys_83_step_ = L_self_state_list_L_self_state_keys_83_step_
+	        l_self_state_list_l_self_state_keys_84_step_ = L_self_state_list_L_self_state_keys_84_step_
+	        l_self_state_list_l_self_state_keys_85_step_ = L_self_state_list_L_self_state_keys_85_step_
+	        l_self_state_list_l_self_state_keys_86_step_ = L_self_state_list_L_self_state_keys_86_step_
+	        l_self_state_list_l_self_state_keys_87_step_ = L_self_state_list_L_self_state_keys_87_step_
+	        l_self_state_list_l_self_state_keys_88_step_ = L_self_state_list_L_self_state_keys_88_step_
+	        l_self_state_list_l_self_state_keys_89_step_ = L_self_state_list_L_self_state_keys_89_step_
+	        l_self_state_list_l_self_state_keys_90_step_ = L_self_state_list_L_self_state_keys_90_step_
+	        l_self_state_list_l_self_state_keys_91_step_ = L_self_state_list_L_self_state_keys_91_step_
+	        l_self_state_list_l_self_state_keys_92_step_ = L_self_state_list_L_self_state_keys_92_step_
+	        l_self_state_list_l_self_state_keys_93_step_ = L_self_state_list_L_self_state_keys_93_step_
+	        l_self_state_list_l_self_state_keys_94_step_ = L_self_state_list_L_self_state_keys_94_step_
+	        l_self_state_list_l_self_state_keys_95_step_ = L_self_state_list_L_self_state_keys_95_step_
+	        l_self_state_list_l_self_state_keys_96_step_ = L_self_state_list_L_self_state_keys_96_step_
+	        l_self_state_list_l_self_state_keys_97_step_ = L_self_state_list_L_self_state_keys_97_step_
+	        l_self_state_list_l_self_state_keys_98_step_ = L_self_state_list_L_self_state_keys_98_step_
+	        l_self_state_list_l_self_state_keys_99_step_ = L_self_state_list_L_self_state_keys_99_step_
+	        l_self_state_list_l_self_state_keys_100_step_ = L_self_state_list_L_self_state_keys_100_step_
+	        l_self_state_list_l_self_state_keys_101_step_ = L_self_state_list_L_self_state_keys_101_step_
+	        l_self_state_list_l_self_state_keys_102_step_ = L_self_state_list_L_self_state_keys_102_step_
+	        l_self_state_list_l_self_state_keys_103_step_ = L_self_state_list_L_self_state_keys_103_step_
+	        l_self_state_list_l_self_state_keys_104_step_ = L_self_state_list_L_self_state_keys_104_step_
+	        l_self_state_list_l_self_state_keys_105_step_ = L_self_state_list_L_self_state_keys_105_step_
+	        l_self_state_list_l_self_state_keys_106_step_ = L_self_state_list_L_self_state_keys_106_step_
+	        l_self_state_list_l_self_state_keys_107_step_ = L_self_state_list_L_self_state_keys_107_step_
+	        l_self_state_list_l_self_state_keys_108_step_ = L_self_state_list_L_self_state_keys_108_step_
+	        l_self_state_list_l_self_state_keys_109_step_ = L_self_state_list_L_self_state_keys_109_step_
+	        l_self_state_list_l_self_state_keys_110_step_ = L_self_state_list_L_self_state_keys_110_step_
+	        l_self_state_list_l_self_state_keys_111_step_ = L_self_state_list_L_self_state_keys_111_step_
+	        l_self_state_list_l_self_state_keys_112_step_ = L_self_state_list_L_self_state_keys_112_step_
+	        l_self_state_list_l_self_state_keys_113_step_ = L_self_state_list_L_self_state_keys_113_step_
+	        l_self_state_list_l_self_state_keys_114_step_ = L_self_state_list_L_self_state_keys_114_step_
+	        l_self_state_list_l_self_state_keys_115_step_ = L_self_state_list_L_self_state_keys_115_step_
+	        l_self_state_list_l_self_state_keys_116_step_ = L_self_state_list_L_self_state_keys_116_step_
+	        l_self_state_list_l_self_state_keys_117_step_ = L_self_state_list_L_self_state_keys_117_step_
+	        l_self_state_list_l_self_state_keys_118_step_ = L_self_state_list_L_self_state_keys_118_step_
+	        l_self_state_list_l_self_state_keys_119_step_ = L_self_state_list_L_self_state_keys_119_step_
+	        l_self_state_list_l_self_state_keys_120_step_ = L_self_state_list_L_self_state_keys_120_step_
+	        l_self_state_list_l_self_state_keys_121_step_ = L_self_state_list_L_self_state_keys_121_step_
+	        l_self_state_list_l_self_state_keys_122_step_ = L_self_state_list_L_self_state_keys_122_step_
+	        l_self_state_list_l_self_state_keys_123_step_ = L_self_state_list_L_self_state_keys_123_step_
+	        l_self_state_list_l_self_state_keys_124_step_ = L_self_state_list_L_self_state_keys_124_step_
+	        l_self_state_list_l_self_state_keys_125_step_ = L_self_state_list_L_self_state_keys_125_step_
+	        l_self_state_list_l_self_state_keys_126_step_ = L_self_state_list_L_self_state_keys_126_step_
+	        l_self_state_list_l_self_state_keys_127_step_ = L_self_state_list_L_self_state_keys_127_step_
+	        l_self_state_list_l_self_state_keys_128_step_ = L_self_state_list_L_self_state_keys_128_step_
+	        l_self_state_list_l_self_state_keys_129_step_ = L_self_state_list_L_self_state_keys_129_step_
+	        l_self_state_list_l_self_state_keys_130_step_ = L_self_state_list_L_self_state_keys_130_step_
+	        l_self_state_list_l_self_state_keys_131_step_ = L_self_state_list_L_self_state_keys_131_step_
+	        l_self_state_list_l_self_state_keys_132_step_ = L_self_state_list_L_self_state_keys_132_step_
+	        l_self_state_list_l_self_state_keys_133_step_ = L_self_state_list_L_self_state_keys_133_step_
+	        l_self_state_list_l_self_state_keys_134_step_ = L_self_state_list_L_self_state_keys_134_step_
+	        l_self_state_list_l_self_state_keys_135_step_ = L_self_state_list_L_self_state_keys_135_step_
+	        l_self_state_list_l_self_state_keys_136_step_ = L_self_state_list_L_self_state_keys_136_step_
+	        l_self_state_list_l_self_state_keys_137_step_ = L_self_state_list_L_self_state_keys_137_step_
+	        l_self_state_list_l_self_state_keys_138_step_ = L_self_state_list_L_self_state_keys_138_step_
+	        l_self_state_list_l_self_state_keys_139_step_ = L_self_state_list_L_self_state_keys_139_step_
+	        l_self_state_list_l_self_state_keys_140_step_ = L_self_state_list_L_self_state_keys_140_step_
+	        l_self_state_list_l_self_state_keys_141_step_ = L_self_state_list_L_self_state_keys_141_step_
+	        l_self_state_list_l_self_state_keys_142_step_ = L_self_state_list_L_self_state_keys_142_step_
+	        l_self_state_list_l_self_state_keys_143_step_ = L_self_state_list_L_self_state_keys_143_step_
+	        l_self_state_list_l_self_state_keys_144_step_ = L_self_state_list_L_self_state_keys_144_step_
+	        l_self_state_list_l_self_state_keys_145_step_ = L_self_state_list_L_self_state_keys_145_step_
+	        l_self_state_list_l_self_state_keys_146_step_ = L_self_state_list_L_self_state_keys_146_step_
+	        l_self_state_list_l_self_state_keys_147_step_ = L_self_state_list_L_self_state_keys_147_step_
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1)
+	        _foreach_add_ = torch._foreach_add_([l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_], 1);  _foreach_add_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
+	        _foreach_lerp_ = torch._foreach_lerp_([l_self_state_list_l_self_state_keys_0_exp_avg_, l_self_state_list_l_self_state_keys_1_exp_avg_, l_self_state_list_l_self_state_keys_2_exp_avg_, l_self_state_list_l_self_state_keys_3_exp_avg_, l_self_state_list_l_self_state_keys_4_exp_avg_, l_self_state_list_l_self_state_keys_5_exp_avg_, l_self_state_list_l_self_state_keys_6_exp_avg_, l_self_state_list_l_self_state_keys_7_exp_avg_, l_self_state_list_l_self_state_keys_8_exp_avg_, l_self_state_list_l_self_state_keys_9_exp_avg_, l_self_state_list_l_self_state_keys_10_exp_avg_, l_self_state_list_l_self_state_keys_11_exp_avg_, l_self_state_list_l_self_state_keys_12_exp_avg_, l_self_state_list_l_self_state_keys_13_exp_avg_, l_self_state_list_l_self_state_keys_14_exp_avg_, l_self_state_list_l_self_state_keys_15_exp_avg_, l_self_state_list_l_self_state_keys_16_exp_avg_, l_self_state_list_l_self_state_keys_17_exp_avg_, l_self_state_list_l_self_state_keys_18_exp_avg_, l_self_state_list_l_self_state_keys_19_exp_avg_, l_self_state_list_l_self_state_keys_20_exp_avg_, l_self_state_list_l_self_state_keys_21_exp_avg_, l_self_state_list_l_self_state_keys_22_exp_avg_, l_self_state_list_l_self_state_keys_23_exp_avg_, l_self_state_list_l_self_state_keys_24_exp_avg_, l_self_state_list_l_self_state_keys_25_exp_avg_, l_self_state_list_l_self_state_keys_26_exp_avg_, l_self_state_list_l_self_state_keys_27_exp_avg_, l_self_state_list_l_self_state_keys_28_exp_avg_, l_self_state_list_l_self_state_keys_29_exp_avg_, l_self_state_list_l_self_state_keys_30_exp_avg_, l_self_state_list_l_self_state_keys_31_exp_avg_, l_self_state_list_l_self_state_keys_32_exp_avg_, l_self_state_list_l_self_state_keys_33_exp_avg_, l_self_state_list_l_self_state_keys_34_exp_avg_, l_self_state_list_l_self_state_keys_35_exp_avg_, l_self_state_list_l_self_state_keys_36_exp_avg_, l_self_state_list_l_self_state_keys_37_exp_avg_, l_self_state_list_l_self_state_keys_38_exp_avg_, l_self_state_list_l_self_state_keys_39_exp_avg_, l_self_state_list_l_self_state_keys_40_exp_avg_, l_self_state_list_l_self_state_keys_41_exp_avg_, l_self_state_list_l_self_state_keys_42_exp_avg_, l_self_state_list_l_self_state_keys_43_exp_avg_, l_self_state_list_l_self_state_keys_44_exp_avg_, l_self_state_list_l_self_state_keys_45_exp_avg_, l_self_state_list_l_self_state_keys_46_exp_avg_, l_self_state_list_l_self_state_keys_47_exp_avg_, l_self_state_list_l_self_state_keys_48_exp_avg_, l_self_state_list_l_self_state_keys_49_exp_avg_, l_self_state_list_l_self_state_keys_50_exp_avg_, l_self_state_list_l_self_state_keys_51_exp_avg_, l_self_state_list_l_self_state_keys_52_exp_avg_, l_self_state_list_l_self_state_keys_53_exp_avg_, l_self_state_list_l_self_state_keys_54_exp_avg_, l_self_state_list_l_self_state_keys_55_exp_avg_, l_self_state_list_l_self_state_keys_56_exp_avg_, l_self_state_list_l_self_state_keys_57_exp_avg_, l_self_state_list_l_self_state_keys_58_exp_avg_, l_self_state_list_l_self_state_keys_59_exp_avg_, l_self_state_list_l_self_state_keys_60_exp_avg_, l_self_state_list_l_self_state_keys_61_exp_avg_, l_self_state_list_l_self_state_keys_62_exp_avg_, l_self_state_list_l_self_state_keys_63_exp_avg_, l_self_state_list_l_self_state_keys_64_exp_avg_, l_self_state_list_l_self_state_keys_65_exp_avg_, l_self_state_list_l_self_state_keys_66_exp_avg_, l_self_state_list_l_self_state_keys_67_exp_avg_, l_self_state_list_l_self_state_keys_68_exp_avg_, l_self_state_list_l_self_state_keys_69_exp_avg_, l_self_state_list_l_self_state_keys_70_exp_avg_, l_self_state_list_l_self_state_keys_71_exp_avg_, l_self_state_list_l_self_state_keys_72_exp_avg_, l_self_state_list_l_self_state_keys_73_exp_avg_, l_self_state_list_l_self_state_keys_74_exp_avg_, l_self_state_list_l_self_state_keys_75_exp_avg_, l_self_state_list_l_self_state_keys_76_exp_avg_, l_self_state_list_l_self_state_keys_77_exp_avg_, l_self_state_list_l_self_state_keys_78_exp_avg_, l_self_state_list_l_self_state_keys_79_exp_avg_, l_self_state_list_l_self_state_keys_80_exp_avg_, l_self_state_list_l_self_state_keys_81_exp_avg_, l_self_state_list_l_self_state_keys_82_exp_avg_, l_self_state_list_l_self_state_keys_83_exp_avg_, l_self_state_list_l_self_state_keys_84_exp_avg_, l_self_state_list_l_self_state_keys_85_exp_avg_, l_self_state_list_l_self_state_keys_86_exp_avg_, l_self_state_list_l_self_state_keys_87_exp_avg_, l_self_state_list_l_self_state_keys_88_exp_avg_, l_self_state_list_l_self_state_keys_89_exp_avg_, l_self_state_list_l_self_state_keys_90_exp_avg_, l_self_state_list_l_self_state_keys_91_exp_avg_, l_self_state_list_l_self_state_keys_92_exp_avg_, l_self_state_list_l_self_state_keys_93_exp_avg_, l_self_state_list_l_self_state_keys_94_exp_avg_, l_self_state_list_l_self_state_keys_95_exp_avg_, l_self_state_list_l_self_state_keys_96_exp_avg_, l_self_state_list_l_self_state_keys_97_exp_avg_, l_self_state_list_l_self_state_keys_98_exp_avg_, l_self_state_list_l_self_state_keys_99_exp_avg_, l_self_state_list_l_self_state_keys_100_exp_avg_, l_self_state_list_l_self_state_keys_101_exp_avg_, l_self_state_list_l_self_state_keys_102_exp_avg_, l_self_state_list_l_self_state_keys_103_exp_avg_, l_self_state_list_l_self_state_keys_104_exp_avg_, l_self_state_list_l_self_state_keys_105_exp_avg_, l_self_state_list_l_self_state_keys_106_exp_avg_, l_self_state_list_l_self_state_keys_107_exp_avg_, l_self_state_list_l_self_state_keys_108_exp_avg_, l_self_state_list_l_self_state_keys_109_exp_avg_, l_self_state_list_l_self_state_keys_110_exp_avg_, l_self_state_list_l_self_state_keys_111_exp_avg_, l_self_state_list_l_self_state_keys_112_exp_avg_, l_self_state_list_l_self_state_keys_113_exp_avg_, l_self_state_list_l_self_state_keys_114_exp_avg_, l_self_state_list_l_self_state_keys_115_exp_avg_, l_self_state_list_l_self_state_keys_116_exp_avg_, l_self_state_list_l_self_state_keys_117_exp_avg_, l_self_state_list_l_self_state_keys_118_exp_avg_, l_self_state_list_l_self_state_keys_119_exp_avg_, l_self_state_list_l_self_state_keys_120_exp_avg_, l_self_state_list_l_self_state_keys_121_exp_avg_, l_self_state_list_l_self_state_keys_122_exp_avg_, l_self_state_list_l_self_state_keys_123_exp_avg_, l_self_state_list_l_self_state_keys_124_exp_avg_, l_self_state_list_l_self_state_keys_125_exp_avg_, l_self_state_list_l_self_state_keys_126_exp_avg_, l_self_state_list_l_self_state_keys_127_exp_avg_, l_self_state_list_l_self_state_keys_128_exp_avg_, l_self_state_list_l_self_state_keys_129_exp_avg_, l_self_state_list_l_self_state_keys_130_exp_avg_, l_self_state_list_l_self_state_keys_131_exp_avg_, l_self_state_list_l_self_state_keys_132_exp_avg_, l_self_state_list_l_self_state_keys_133_exp_avg_, l_self_state_list_l_self_state_keys_134_exp_avg_, l_self_state_list_l_self_state_keys_135_exp_avg_, l_self_state_list_l_self_state_keys_136_exp_avg_, l_self_state_list_l_self_state_keys_137_exp_avg_, l_self_state_list_l_self_state_keys_138_exp_avg_, l_self_state_list_l_self_state_keys_139_exp_avg_, l_self_state_list_l_self_state_keys_140_exp_avg_, l_self_state_list_l_self_state_keys_141_exp_avg_, l_self_state_list_l_self_state_keys_142_exp_avg_, l_self_state_list_l_self_state_keys_143_exp_avg_, l_self_state_list_l_self_state_keys_144_exp_avg_, l_self_state_list_l_self_state_keys_145_exp_avg_, l_self_state_list_l_self_state_keys_146_exp_avg_, l_self_state_list_l_self_state_keys_147_exp_avg_], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], 0.09999999999999998);  _foreach_lerp_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2)
+	        _foreach_mul_ = torch._foreach_mul_([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_], 0.999);  _foreach_mul_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        _foreach_addcmul_ = torch._foreach_addcmul_([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], [l_self_param_groups_0_params_0_grad, l_self_param_groups_0_params_1_grad, l_self_param_groups_0_params_2_grad, l_self_param_groups_0_params_3_grad, l_self_param_groups_0_params_4_grad, l_self_param_groups_0_params_5_grad, l_self_param_groups_0_params_6_grad, l_self_param_groups_0_params_7_grad, l_self_param_groups_0_params_8_grad, l_self_param_groups_0_params_9_grad, l_self_param_groups_0_params_10_grad, l_self_param_groups_0_params_11_grad, l_self_param_groups_0_params_12_grad, l_self_param_groups_0_params_13_grad, l_self_param_groups_0_params_14_grad, l_self_param_groups_0_params_15_grad, l_self_param_groups_0_params_16_grad, l_self_param_groups_0_params_17_grad, l_self_param_groups_0_params_18_grad, l_self_param_groups_0_params_19_grad, l_self_param_groups_0_params_20_grad, l_self_param_groups_0_params_21_grad, l_self_param_groups_0_params_22_grad, l_self_param_groups_0_params_23_grad, l_self_param_groups_0_params_24_grad, l_self_param_groups_0_params_25_grad, l_self_param_groups_0_params_26_grad, l_self_param_groups_0_params_27_grad, l_self_param_groups_0_params_28_grad, l_self_param_groups_0_params_29_grad, l_self_param_groups_0_params_30_grad, l_self_param_groups_0_params_31_grad, l_self_param_groups_0_params_32_grad, l_self_param_groups_0_params_33_grad, l_self_param_groups_0_params_34_grad, l_self_param_groups_0_params_35_grad, l_self_param_groups_0_params_36_grad, l_self_param_groups_0_params_37_grad, l_self_param_groups_0_params_38_grad, l_self_param_groups_0_params_39_grad, l_self_param_groups_0_params_40_grad, l_self_param_groups_0_params_41_grad, l_self_param_groups_0_params_42_grad, l_self_param_groups_0_params_43_grad, l_self_param_groups_0_params_44_grad, l_self_param_groups_0_params_45_grad, l_self_param_groups_0_params_46_grad, l_self_param_groups_0_params_47_grad, l_self_param_groups_0_params_48_grad, l_self_param_groups_0_params_49_grad, l_self_param_groups_0_params_50_grad, l_self_param_groups_0_params_51_grad, l_self_param_groups_0_params_52_grad, l_self_param_groups_0_params_53_grad, l_self_param_groups_0_params_54_grad, l_self_param_groups_0_params_55_grad, l_self_param_groups_0_params_56_grad, l_self_param_groups_0_params_57_grad, l_self_param_groups_0_params_58_grad, l_self_param_groups_0_params_59_grad, l_self_param_groups_0_params_60_grad, l_self_param_groups_0_params_61_grad, l_self_param_groups_0_params_62_grad, l_self_param_groups_0_params_63_grad, l_self_param_groups_0_params_64_grad, l_self_param_groups_0_params_65_grad, l_self_param_groups_0_params_66_grad, l_self_param_groups_0_params_67_grad, l_self_param_groups_0_params_68_grad, l_self_param_groups_0_params_69_grad, l_self_param_groups_0_params_70_grad, l_self_param_groups_0_params_71_grad, l_self_param_groups_0_params_72_grad, l_self_param_groups_0_params_73_grad, l_self_param_groups_0_params_74_grad, l_self_param_groups_0_params_75_grad, l_self_param_groups_0_params_76_grad, l_self_param_groups_0_params_77_grad, l_self_param_groups_0_params_78_grad, l_self_param_groups_0_params_79_grad, l_self_param_groups_0_params_80_grad, l_self_param_groups_0_params_81_grad, l_self_param_groups_0_params_82_grad, l_self_param_groups_0_params_83_grad, l_self_param_groups_0_params_84_grad, l_self_param_groups_0_params_85_grad, l_self_param_groups_0_params_86_grad, l_self_param_groups_0_params_87_grad, l_self_param_groups_0_params_88_grad, l_self_param_groups_0_params_89_grad, l_self_param_groups_0_params_90_grad, l_self_param_groups_0_params_91_grad, l_self_param_groups_0_params_92_grad, l_self_param_groups_0_params_93_grad, l_self_param_groups_0_params_94_grad, l_self_param_groups_0_params_95_grad, l_self_param_groups_0_params_96_grad, l_self_param_groups_0_params_97_grad, l_self_param_groups_0_params_98_grad, l_self_param_groups_0_params_99_grad, l_self_param_groups_0_params_100_grad, l_self_param_groups_0_params_101_grad, l_self_param_groups_0_params_102_grad, l_self_param_groups_0_params_103_grad, l_self_param_groups_0_params_104_grad, l_self_param_groups_0_params_105_grad, l_self_param_groups_0_params_106_grad, l_self_param_groups_0_params_107_grad, l_self_param_groups_0_params_108_grad, l_self_param_groups_0_params_109_grad, l_self_param_groups_0_params_110_grad, l_self_param_groups_0_params_111_grad, l_self_param_groups_0_params_112_grad, l_self_param_groups_0_params_113_grad, l_self_param_groups_0_params_114_grad, l_self_param_groups_0_params_115_grad, l_self_param_groups_0_params_116_grad, l_self_param_groups_0_params_117_grad, l_self_param_groups_0_params_118_grad, l_self_param_groups_0_params_119_grad, l_self_param_groups_0_params_120_grad, l_self_param_groups_0_params_121_grad, l_self_param_groups_0_params_122_grad, l_self_param_groups_0_params_123_grad, l_self_param_groups_0_params_124_grad, l_self_param_groups_0_params_125_grad, l_self_param_groups_0_params_126_grad, l_self_param_groups_0_params_127_grad, l_self_param_groups_0_params_128_grad, l_self_param_groups_0_params_129_grad, l_self_param_groups_0_params_130_grad, l_self_param_groups_0_params_131_grad, l_self_param_groups_0_params_132_grad, l_self_param_groups_0_params_133_grad, l_self_param_groups_0_params_134_grad, l_self_param_groups_0_params_135_grad, l_self_param_groups_0_params_136_grad, l_self_param_groups_0_params_137_grad, l_self_param_groups_0_params_138_grad, l_self_param_groups_0_params_139_grad, l_self_param_groups_0_params_140_grad, l_self_param_groups_0_params_141_grad, l_self_param_groups_0_params_142_grad, l_self_param_groups_0_params_143_grad, l_self_param_groups_0_params_144_grad, l_self_param_groups_0_params_145_grad, l_self_param_groups_0_params_146_grad, l_self_param_groups_0_params_147_grad], 0.0010000000000000009);  l_self_param_groups_0_params_0_grad = l_self_param_groups_0_params_1_grad = l_self_param_groups_0_params_2_grad = l_self_param_groups_0_params_3_grad = l_self_param_groups_0_params_4_grad = l_self_param_groups_0_params_5_grad = l_self_param_groups_0_params_6_grad = l_self_param_groups_0_params_7_grad = l_self_param_groups_0_params_8_grad = l_self_param_groups_0_params_9_grad = l_self_param_groups_0_params_10_grad = l_self_param_groups_0_params_11_grad = l_self_param_groups_0_params_12_grad = l_self_param_groups_0_params_13_grad = l_self_param_groups_0_params_14_grad = l_self_param_groups_0_params_15_grad = l_self_param_groups_0_params_16_grad = l_self_param_groups_0_params_17_grad = l_self_param_groups_0_params_18_grad = l_self_param_groups_0_params_19_grad = l_self_param_groups_0_params_20_grad = l_self_param_groups_0_params_21_grad = l_self_param_groups_0_params_22_grad = l_self_param_groups_0_params_23_grad = l_self_param_groups_0_params_24_grad = l_self_param_groups_0_params_25_grad = l_self_param_groups_0_params_26_grad = l_self_param_groups_0_params_27_grad = l_self_param_groups_0_params_28_grad = l_self_param_groups_0_params_29_grad = l_self_param_groups_0_params_30_grad = l_self_param_groups_0_params_31_grad = l_self_param_groups_0_params_32_grad = l_self_param_groups_0_params_33_grad = l_self_param_groups_0_params_34_grad = l_self_param_groups_0_params_35_grad = l_self_param_groups_0_params_36_grad = l_self_param_groups_0_params_37_grad = l_self_param_groups_0_params_38_grad = l_self_param_groups_0_params_39_grad = l_self_param_groups_0_params_40_grad = l_self_param_groups_0_params_41_grad = l_self_param_groups_0_params_42_grad = l_self_param_groups_0_params_43_grad = l_self_param_groups_0_params_44_grad = l_self_param_groups_0_params_45_grad = l_self_param_groups_0_params_46_grad = l_self_param_groups_0_params_47_grad = l_self_param_groups_0_params_48_grad = l_self_param_groups_0_params_49_grad = l_self_param_groups_0_params_50_grad = l_self_param_groups_0_params_51_grad = l_self_param_groups_0_params_52_grad = l_self_param_groups_0_params_53_grad = l_self_param_groups_0_params_54_grad = l_self_param_groups_0_params_55_grad = l_self_param_groups_0_params_56_grad = l_self_param_groups_0_params_57_grad = l_self_param_groups_0_params_58_grad = l_self_param_groups_0_params_59_grad = l_self_param_groups_0_params_60_grad = l_self_param_groups_0_params_61_grad = l_self_param_groups_0_params_62_grad = l_self_param_groups_0_params_63_grad = l_self_param_groups_0_params_64_grad = l_self_param_groups_0_params_65_grad = l_self_param_groups_0_params_66_grad = l_self_param_groups_0_params_67_grad = l_self_param_groups_0_params_68_grad = l_self_param_groups_0_params_69_grad = l_self_param_groups_0_params_70_grad = l_self_param_groups_0_params_71_grad = l_self_param_groups_0_params_72_grad = l_self_param_groups_0_params_73_grad = l_self_param_groups_0_params_74_grad = l_self_param_groups_0_params_75_grad = l_self_param_groups_0_params_76_grad = l_self_param_groups_0_params_77_grad = l_self_param_groups_0_params_78_grad = l_self_param_groups_0_params_79_grad = l_self_param_groups_0_params_80_grad = l_self_param_groups_0_params_81_grad = l_self_param_groups_0_params_82_grad = l_self_param_groups_0_params_83_grad = l_self_param_groups_0_params_84_grad = l_self_param_groups_0_params_85_grad = l_self_param_groups_0_params_86_grad = l_self_param_groups_0_params_87_grad = l_self_param_groups_0_params_88_grad = l_self_param_groups_0_params_89_grad = l_self_param_groups_0_params_90_grad = l_self_param_groups_0_params_91_grad = l_self_param_groups_0_params_92_grad = l_self_param_groups_0_params_93_grad = l_self_param_groups_0_params_94_grad = l_self_param_groups_0_params_95_grad = l_self_param_groups_0_params_96_grad = l_self_param_groups_0_params_97_grad = l_self_param_groups_0_params_98_grad = l_self_param_groups_0_params_99_grad = l_self_param_groups_0_params_100_grad = l_self_param_groups_0_params_101_grad = l_self_param_groups_0_params_102_grad = l_self_param_groups_0_params_103_grad = l_self_param_groups_0_params_104_grad = l_self_param_groups_0_params_105_grad = l_self_param_groups_0_params_106_grad = l_self_param_groups_0_params_107_grad = l_self_param_groups_0_params_108_grad = l_self_param_groups_0_params_109_grad = l_self_param_groups_0_params_110_grad = l_self_param_groups_0_params_111_grad = l_self_param_groups_0_params_112_grad = l_self_param_groups_0_params_113_grad = l_self_param_groups_0_params_114_grad = l_self_param_groups_0_params_115_grad = l_self_param_groups_0_params_116_grad = l_self_param_groups_0_params_117_grad = l_self_param_groups_0_params_118_grad = l_self_param_groups_0_params_119_grad = l_self_param_groups_0_params_120_grad = l_self_param_groups_0_params_121_grad = l_self_param_groups_0_params_122_grad = l_self_param_groups_0_params_123_grad = l_self_param_groups_0_params_124_grad = l_self_param_groups_0_params_125_grad = l_self_param_groups_0_params_126_grad = l_self_param_groups_0_params_127_grad = l_self_param_groups_0_params_128_grad = l_self_param_groups_0_params_129_grad = l_self_param_groups_0_params_130_grad = l_self_param_groups_0_params_131_grad = l_self_param_groups_0_params_132_grad = l_self_param_groups_0_params_133_grad = l_self_param_groups_0_params_134_grad = l_self_param_groups_0_params_135_grad = l_self_param_groups_0_params_136_grad = l_self_param_groups_0_params_137_grad = l_self_param_groups_0_params_138_grad = l_self_param_groups_0_params_139_grad = l_self_param_groups_0_params_140_grad = l_self_param_groups_0_params_141_grad = l_self_param_groups_0_params_142_grad = l_self_param_groups_0_params_143_grad = l_self_param_groups_0_params_144_grad = l_self_param_groups_0_params_145_grad = l_self_param_groups_0_params_146_grad = l_self_param_groups_0_params_147_grad = _foreach_addcmul_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps)
+	        _foreach_pow = torch._foreach_pow(0.9, [l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_])
+	        getitem_592: "f32[][]cuda:0" = _foreach_pow[0]
+	        getitem_593: "f32[][]cuda:0" = _foreach_pow[1]
+	        getitem_594: "f32[][]cuda:0" = _foreach_pow[2]
+	        getitem_595: "f32[][]cuda:0" = _foreach_pow[3]
+	        getitem_596: "f32[][]cuda:0" = _foreach_pow[4]
+	        getitem_597: "f32[][]cuda:0" = _foreach_pow[5]
+	        getitem_598: "f32[][]cuda:0" = _foreach_pow[6]
+	        getitem_599: "f32[][]cuda:0" = _foreach_pow[7]
+	        getitem_600: "f32[][]cuda:0" = _foreach_pow[8]
+	        getitem_601: "f32[][]cuda:0" = _foreach_pow[9]
+	        getitem_602: "f32[][]cuda:0" = _foreach_pow[10]
+	        getitem_603: "f32[][]cuda:0" = _foreach_pow[11]
+	        getitem_604: "f32[][]cuda:0" = _foreach_pow[12]
+	        getitem_605: "f32[][]cuda:0" = _foreach_pow[13]
+	        getitem_606: "f32[][]cuda:0" = _foreach_pow[14]
+	        getitem_607: "f32[][]cuda:0" = _foreach_pow[15]
+	        getitem_608: "f32[][]cuda:0" = _foreach_pow[16]
+	        getitem_609: "f32[][]cuda:0" = _foreach_pow[17]
+	        getitem_610: "f32[][]cuda:0" = _foreach_pow[18]
+	        getitem_611: "f32[][]cuda:0" = _foreach_pow[19]
+	        getitem_612: "f32[][]cuda:0" = _foreach_pow[20]
+	        getitem_613: "f32[][]cuda:0" = _foreach_pow[21]
+	        getitem_614: "f32[][]cuda:0" = _foreach_pow[22]
+	        getitem_615: "f32[][]cuda:0" = _foreach_pow[23]
+	        getitem_616: "f32[][]cuda:0" = _foreach_pow[24]
+	        getitem_617: "f32[][]cuda:0" = _foreach_pow[25]
+	        getitem_618: "f32[][]cuda:0" = _foreach_pow[26]
+	        getitem_619: "f32[][]cuda:0" = _foreach_pow[27]
+	        getitem_620: "f32[][]cuda:0" = _foreach_pow[28]
+	        getitem_621: "f32[][]cuda:0" = _foreach_pow[29]
+	        getitem_622: "f32[][]cuda:0" = _foreach_pow[30]
+	        getitem_623: "f32[][]cuda:0" = _foreach_pow[31]
+	        getitem_624: "f32[][]cuda:0" = _foreach_pow[32]
+	        getitem_625: "f32[][]cuda:0" = _foreach_pow[33]
+	        getitem_626: "f32[][]cuda:0" = _foreach_pow[34]
+	        getitem_627: "f32[][]cuda:0" = _foreach_pow[35]
+	        getitem_628: "f32[][]cuda:0" = _foreach_pow[36]
+	        getitem_629: "f32[][]cuda:0" = _foreach_pow[37]
+	        getitem_630: "f32[][]cuda:0" = _foreach_pow[38]
+	        getitem_631: "f32[][]cuda:0" = _foreach_pow[39]
+	        getitem_632: "f32[][]cuda:0" = _foreach_pow[40]
+	        getitem_633: "f32[][]cuda:0" = _foreach_pow[41]
+	        getitem_634: "f32[][]cuda:0" = _foreach_pow[42]
+	        getitem_635: "f32[][]cuda:0" = _foreach_pow[43]
+	        getitem_636: "f32[][]cuda:0" = _foreach_pow[44]
+	        getitem_637: "f32[][]cuda:0" = _foreach_pow[45]
+	        getitem_638: "f32[][]cuda:0" = _foreach_pow[46]
+	        getitem_639: "f32[][]cuda:0" = _foreach_pow[47]
+	        getitem_640: "f32[][]cuda:0" = _foreach_pow[48]
+	        getitem_641: "f32[][]cuda:0" = _foreach_pow[49]
+	        getitem_642: "f32[][]cuda:0" = _foreach_pow[50]
+	        getitem_643: "f32[][]cuda:0" = _foreach_pow[51]
+	        getitem_644: "f32[][]cuda:0" = _foreach_pow[52]
+	        getitem_645: "f32[][]cuda:0" = _foreach_pow[53]
+	        getitem_646: "f32[][]cuda:0" = _foreach_pow[54]
+	        getitem_647: "f32[][]cuda:0" = _foreach_pow[55]
+	        getitem_648: "f32[][]cuda:0" = _foreach_pow[56]
+	        getitem_649: "f32[][]cuda:0" = _foreach_pow[57]
+	        getitem_650: "f32[][]cuda:0" = _foreach_pow[58]
+	        getitem_651: "f32[][]cuda:0" = _foreach_pow[59]
+	        getitem_652: "f32[][]cuda:0" = _foreach_pow[60]
+	        getitem_653: "f32[][]cuda:0" = _foreach_pow[61]
+	        getitem_654: "f32[][]cuda:0" = _foreach_pow[62]
+	        getitem_655: "f32[][]cuda:0" = _foreach_pow[63]
+	        getitem_656: "f32[][]cuda:0" = _foreach_pow[64]
+	        getitem_657: "f32[][]cuda:0" = _foreach_pow[65]
+	        getitem_658: "f32[][]cuda:0" = _foreach_pow[66]
+	        getitem_659: "f32[][]cuda:0" = _foreach_pow[67]
+	        getitem_660: "f32[][]cuda:0" = _foreach_pow[68]
+	        getitem_661: "f32[][]cuda:0" = _foreach_pow[69]
+	        getitem_662: "f32[][]cuda:0" = _foreach_pow[70]
+	        getitem_663: "f32[][]cuda:0" = _foreach_pow[71]
+	        getitem_664: "f32[][]cuda:0" = _foreach_pow[72]
+	        getitem_665: "f32[][]cuda:0" = _foreach_pow[73]
+	        getitem_666: "f32[][]cuda:0" = _foreach_pow[74]
+	        getitem_667: "f32[][]cuda:0" = _foreach_pow[75]
+	        getitem_668: "f32[][]cuda:0" = _foreach_pow[76]
+	        getitem_669: "f32[][]cuda:0" = _foreach_pow[77]
+	        getitem_670: "f32[][]cuda:0" = _foreach_pow[78]
+	        getitem_671: "f32[][]cuda:0" = _foreach_pow[79]
+	        getitem_672: "f32[][]cuda:0" = _foreach_pow[80]
+	        getitem_673: "f32[][]cuda:0" = _foreach_pow[81]
+	        getitem_674: "f32[][]cuda:0" = _foreach_pow[82]
+	        getitem_675: "f32[][]cuda:0" = _foreach_pow[83]
+	        getitem_676: "f32[][]cuda:0" = _foreach_pow[84]
+	        getitem_677: "f32[][]cuda:0" = _foreach_pow[85]
+	        getitem_678: "f32[][]cuda:0" = _foreach_pow[86]
+	        getitem_679: "f32[][]cuda:0" = _foreach_pow[87]
+	        getitem_680: "f32[][]cuda:0" = _foreach_pow[88]
+	        getitem_681: "f32[][]cuda:0" = _foreach_pow[89]
+	        getitem_682: "f32[][]cuda:0" = _foreach_pow[90]
+	        getitem_683: "f32[][]cuda:0" = _foreach_pow[91]
+	        getitem_684: "f32[][]cuda:0" = _foreach_pow[92]
+	        getitem_685: "f32[][]cuda:0" = _foreach_pow[93]
+	        getitem_686: "f32[][]cuda:0" = _foreach_pow[94]
+	        getitem_687: "f32[][]cuda:0" = _foreach_pow[95]
+	        getitem_688: "f32[][]cuda:0" = _foreach_pow[96]
+	        getitem_689: "f32[][]cuda:0" = _foreach_pow[97]
+	        getitem_690: "f32[][]cuda:0" = _foreach_pow[98]
+	        getitem_691: "f32[][]cuda:0" = _foreach_pow[99]
+	        getitem_692: "f32[][]cuda:0" = _foreach_pow[100]
+	        getitem_693: "f32[][]cuda:0" = _foreach_pow[101]
+	        getitem_694: "f32[][]cuda:0" = _foreach_pow[102]
+	        getitem_695: "f32[][]cuda:0" = _foreach_pow[103]
+	        getitem_696: "f32[][]cuda:0" = _foreach_pow[104]
+	        getitem_697: "f32[][]cuda:0" = _foreach_pow[105]
+	        getitem_698: "f32[][]cuda:0" = _foreach_pow[106]
+	        getitem_699: "f32[][]cuda:0" = _foreach_pow[107]
+	        getitem_700: "f32[][]cuda:0" = _foreach_pow[108]
+	        getitem_701: "f32[][]cuda:0" = _foreach_pow[109]
+	        getitem_702: "f32[][]cuda:0" = _foreach_pow[110]
+	        getitem_703: "f32[][]cuda:0" = _foreach_pow[111]
+	        getitem_704: "f32[][]cuda:0" = _foreach_pow[112]
+	        getitem_705: "f32[][]cuda:0" = _foreach_pow[113]
+	        getitem_706: "f32[][]cuda:0" = _foreach_pow[114]
+	        getitem_707: "f32[][]cuda:0" = _foreach_pow[115]
+	        getitem_708: "f32[][]cuda:0" = _foreach_pow[116]
+	        getitem_709: "f32[][]cuda:0" = _foreach_pow[117]
+	        getitem_710: "f32[][]cuda:0" = _foreach_pow[118]
+	        getitem_711: "f32[][]cuda:0" = _foreach_pow[119]
+	        getitem_712: "f32[][]cuda:0" = _foreach_pow[120]
+	        getitem_713: "f32[][]cuda:0" = _foreach_pow[121]
+	        getitem_714: "f32[][]cuda:0" = _foreach_pow[122]
+	        getitem_715: "f32[][]cuda:0" = _foreach_pow[123]
+	        getitem_716: "f32[][]cuda:0" = _foreach_pow[124]
+	        getitem_717: "f32[][]cuda:0" = _foreach_pow[125]
+	        getitem_718: "f32[][]cuda:0" = _foreach_pow[126]
+	        getitem_719: "f32[][]cuda:0" = _foreach_pow[127]
+	        getitem_720: "f32[][]cuda:0" = _foreach_pow[128]
+	        getitem_721: "f32[][]cuda:0" = _foreach_pow[129]
+	        getitem_722: "f32[][]cuda:0" = _foreach_pow[130]
+	        getitem_723: "f32[][]cuda:0" = _foreach_pow[131]
+	        getitem_724: "f32[][]cuda:0" = _foreach_pow[132]
+	        getitem_725: "f32[][]cuda:0" = _foreach_pow[133]
+	        getitem_726: "f32[][]cuda:0" = _foreach_pow[134]
+	        getitem_727: "f32[][]cuda:0" = _foreach_pow[135]
+	        getitem_728: "f32[][]cuda:0" = _foreach_pow[136]
+	        getitem_729: "f32[][]cuda:0" = _foreach_pow[137]
+	        getitem_730: "f32[][]cuda:0" = _foreach_pow[138]
+	        getitem_731: "f32[][]cuda:0" = _foreach_pow[139]
+	        getitem_732: "f32[][]cuda:0" = _foreach_pow[140]
+	        getitem_733: "f32[][]cuda:0" = _foreach_pow[141]
+	        getitem_734: "f32[][]cuda:0" = _foreach_pow[142]
+	        getitem_735: "f32[][]cuda:0" = _foreach_pow[143]
+	        getitem_736: "f32[][]cuda:0" = _foreach_pow[144]
+	        getitem_737: "f32[][]cuda:0" = _foreach_pow[145]
+	        getitem_738: "f32[][]cuda:0" = _foreach_pow[146]
+	        getitem_739: "f32[][]cuda:0" = _foreach_pow[147];  _foreach_pow = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps)
+	        _foreach_pow_1 = torch._foreach_pow(0.999, [l_self_state_list_l_self_state_keys_0_step_, l_self_state_list_l_self_state_keys_1_step_, l_self_state_list_l_self_state_keys_2_step_, l_self_state_list_l_self_state_keys_3_step_, l_self_state_list_l_self_state_keys_4_step_, l_self_state_list_l_self_state_keys_5_step_, l_self_state_list_l_self_state_keys_6_step_, l_self_state_list_l_self_state_keys_7_step_, l_self_state_list_l_self_state_keys_8_step_, l_self_state_list_l_self_state_keys_9_step_, l_self_state_list_l_self_state_keys_10_step_, l_self_state_list_l_self_state_keys_11_step_, l_self_state_list_l_self_state_keys_12_step_, l_self_state_list_l_self_state_keys_13_step_, l_self_state_list_l_self_state_keys_14_step_, l_self_state_list_l_self_state_keys_15_step_, l_self_state_list_l_self_state_keys_16_step_, l_self_state_list_l_self_state_keys_17_step_, l_self_state_list_l_self_state_keys_18_step_, l_self_state_list_l_self_state_keys_19_step_, l_self_state_list_l_self_state_keys_20_step_, l_self_state_list_l_self_state_keys_21_step_, l_self_state_list_l_self_state_keys_22_step_, l_self_state_list_l_self_state_keys_23_step_, l_self_state_list_l_self_state_keys_24_step_, l_self_state_list_l_self_state_keys_25_step_, l_self_state_list_l_self_state_keys_26_step_, l_self_state_list_l_self_state_keys_27_step_, l_self_state_list_l_self_state_keys_28_step_, l_self_state_list_l_self_state_keys_29_step_, l_self_state_list_l_self_state_keys_30_step_, l_self_state_list_l_self_state_keys_31_step_, l_self_state_list_l_self_state_keys_32_step_, l_self_state_list_l_self_state_keys_33_step_, l_self_state_list_l_self_state_keys_34_step_, l_self_state_list_l_self_state_keys_35_step_, l_self_state_list_l_self_state_keys_36_step_, l_self_state_list_l_self_state_keys_37_step_, l_self_state_list_l_self_state_keys_38_step_, l_self_state_list_l_self_state_keys_39_step_, l_self_state_list_l_self_state_keys_40_step_, l_self_state_list_l_self_state_keys_41_step_, l_self_state_list_l_self_state_keys_42_step_, l_self_state_list_l_self_state_keys_43_step_, l_self_state_list_l_self_state_keys_44_step_, l_self_state_list_l_self_state_keys_45_step_, l_self_state_list_l_self_state_keys_46_step_, l_self_state_list_l_self_state_keys_47_step_, l_self_state_list_l_self_state_keys_48_step_, l_self_state_list_l_self_state_keys_49_step_, l_self_state_list_l_self_state_keys_50_step_, l_self_state_list_l_self_state_keys_51_step_, l_self_state_list_l_self_state_keys_52_step_, l_self_state_list_l_self_state_keys_53_step_, l_self_state_list_l_self_state_keys_54_step_, l_self_state_list_l_self_state_keys_55_step_, l_self_state_list_l_self_state_keys_56_step_, l_self_state_list_l_self_state_keys_57_step_, l_self_state_list_l_self_state_keys_58_step_, l_self_state_list_l_self_state_keys_59_step_, l_self_state_list_l_self_state_keys_60_step_, l_self_state_list_l_self_state_keys_61_step_, l_self_state_list_l_self_state_keys_62_step_, l_self_state_list_l_self_state_keys_63_step_, l_self_state_list_l_self_state_keys_64_step_, l_self_state_list_l_self_state_keys_65_step_, l_self_state_list_l_self_state_keys_66_step_, l_self_state_list_l_self_state_keys_67_step_, l_self_state_list_l_self_state_keys_68_step_, l_self_state_list_l_self_state_keys_69_step_, l_self_state_list_l_self_state_keys_70_step_, l_self_state_list_l_self_state_keys_71_step_, l_self_state_list_l_self_state_keys_72_step_, l_self_state_list_l_self_state_keys_73_step_, l_self_state_list_l_self_state_keys_74_step_, l_self_state_list_l_self_state_keys_75_step_, l_self_state_list_l_self_state_keys_76_step_, l_self_state_list_l_self_state_keys_77_step_, l_self_state_list_l_self_state_keys_78_step_, l_self_state_list_l_self_state_keys_79_step_, l_self_state_list_l_self_state_keys_80_step_, l_self_state_list_l_self_state_keys_81_step_, l_self_state_list_l_self_state_keys_82_step_, l_self_state_list_l_self_state_keys_83_step_, l_self_state_list_l_self_state_keys_84_step_, l_self_state_list_l_self_state_keys_85_step_, l_self_state_list_l_self_state_keys_86_step_, l_self_state_list_l_self_state_keys_87_step_, l_self_state_list_l_self_state_keys_88_step_, l_self_state_list_l_self_state_keys_89_step_, l_self_state_list_l_self_state_keys_90_step_, l_self_state_list_l_self_state_keys_91_step_, l_self_state_list_l_self_state_keys_92_step_, l_self_state_list_l_self_state_keys_93_step_, l_self_state_list_l_self_state_keys_94_step_, l_self_state_list_l_self_state_keys_95_step_, l_self_state_list_l_self_state_keys_96_step_, l_self_state_list_l_self_state_keys_97_step_, l_self_state_list_l_self_state_keys_98_step_, l_self_state_list_l_self_state_keys_99_step_, l_self_state_list_l_self_state_keys_100_step_, l_self_state_list_l_self_state_keys_101_step_, l_self_state_list_l_self_state_keys_102_step_, l_self_state_list_l_self_state_keys_103_step_, l_self_state_list_l_self_state_keys_104_step_, l_self_state_list_l_self_state_keys_105_step_, l_self_state_list_l_self_state_keys_106_step_, l_self_state_list_l_self_state_keys_107_step_, l_self_state_list_l_self_state_keys_108_step_, l_self_state_list_l_self_state_keys_109_step_, l_self_state_list_l_self_state_keys_110_step_, l_self_state_list_l_self_state_keys_111_step_, l_self_state_list_l_self_state_keys_112_step_, l_self_state_list_l_self_state_keys_113_step_, l_self_state_list_l_self_state_keys_114_step_, l_self_state_list_l_self_state_keys_115_step_, l_self_state_list_l_self_state_keys_116_step_, l_self_state_list_l_self_state_keys_117_step_, l_self_state_list_l_self_state_keys_118_step_, l_self_state_list_l_self_state_keys_119_step_, l_self_state_list_l_self_state_keys_120_step_, l_self_state_list_l_self_state_keys_121_step_, l_self_state_list_l_self_state_keys_122_step_, l_self_state_list_l_self_state_keys_123_step_, l_self_state_list_l_self_state_keys_124_step_, l_self_state_list_l_self_state_keys_125_step_, l_self_state_list_l_self_state_keys_126_step_, l_self_state_list_l_self_state_keys_127_step_, l_self_state_list_l_self_state_keys_128_step_, l_self_state_list_l_self_state_keys_129_step_, l_self_state_list_l_self_state_keys_130_step_, l_self_state_list_l_self_state_keys_131_step_, l_self_state_list_l_self_state_keys_132_step_, l_self_state_list_l_self_state_keys_133_step_, l_self_state_list_l_self_state_keys_134_step_, l_self_state_list_l_self_state_keys_135_step_, l_self_state_list_l_self_state_keys_136_step_, l_self_state_list_l_self_state_keys_137_step_, l_self_state_list_l_self_state_keys_138_step_, l_self_state_list_l_self_state_keys_139_step_, l_self_state_list_l_self_state_keys_140_step_, l_self_state_list_l_self_state_keys_141_step_, l_self_state_list_l_self_state_keys_142_step_, l_self_state_list_l_self_state_keys_143_step_, l_self_state_list_l_self_state_keys_144_step_, l_self_state_list_l_self_state_keys_145_step_, l_self_state_list_l_self_state_keys_146_step_, l_self_state_list_l_self_state_keys_147_step_]);  l_self_state_list_l_self_state_keys_0_step_ = l_self_state_list_l_self_state_keys_1_step_ = l_self_state_list_l_self_state_keys_2_step_ = l_self_state_list_l_self_state_keys_3_step_ = l_self_state_list_l_self_state_keys_4_step_ = l_self_state_list_l_self_state_keys_5_step_ = l_self_state_list_l_self_state_keys_6_step_ = l_self_state_list_l_self_state_keys_7_step_ = l_self_state_list_l_self_state_keys_8_step_ = l_self_state_list_l_self_state_keys_9_step_ = l_self_state_list_l_self_state_keys_10_step_ = l_self_state_list_l_self_state_keys_11_step_ = l_self_state_list_l_self_state_keys_12_step_ = l_self_state_list_l_self_state_keys_13_step_ = l_self_state_list_l_self_state_keys_14_step_ = l_self_state_list_l_self_state_keys_15_step_ = l_self_state_list_l_self_state_keys_16_step_ = l_self_state_list_l_self_state_keys_17_step_ = l_self_state_list_l_self_state_keys_18_step_ = l_self_state_list_l_self_state_keys_19_step_ = l_self_state_list_l_self_state_keys_20_step_ = l_self_state_list_l_self_state_keys_21_step_ = l_self_state_list_l_self_state_keys_22_step_ = l_self_state_list_l_self_state_keys_23_step_ = l_self_state_list_l_self_state_keys_24_step_ = l_self_state_list_l_self_state_keys_25_step_ = l_self_state_list_l_self_state_keys_26_step_ = l_self_state_list_l_self_state_keys_27_step_ = l_self_state_list_l_self_state_keys_28_step_ = l_self_state_list_l_self_state_keys_29_step_ = l_self_state_list_l_self_state_keys_30_step_ = l_self_state_list_l_self_state_keys_31_step_ = l_self_state_list_l_self_state_keys_32_step_ = l_self_state_list_l_self_state_keys_33_step_ = l_self_state_list_l_self_state_keys_34_step_ = l_self_state_list_l_self_state_keys_35_step_ = l_self_state_list_l_self_state_keys_36_step_ = l_self_state_list_l_self_state_keys_37_step_ = l_self_state_list_l_self_state_keys_38_step_ = l_self_state_list_l_self_state_keys_39_step_ = l_self_state_list_l_self_state_keys_40_step_ = l_self_state_list_l_self_state_keys_41_step_ = l_self_state_list_l_self_state_keys_42_step_ = l_self_state_list_l_self_state_keys_43_step_ = l_self_state_list_l_self_state_keys_44_step_ = l_self_state_list_l_self_state_keys_45_step_ = l_self_state_list_l_self_state_keys_46_step_ = l_self_state_list_l_self_state_keys_47_step_ = l_self_state_list_l_self_state_keys_48_step_ = l_self_state_list_l_self_state_keys_49_step_ = l_self_state_list_l_self_state_keys_50_step_ = l_self_state_list_l_self_state_keys_51_step_ = l_self_state_list_l_self_state_keys_52_step_ = l_self_state_list_l_self_state_keys_53_step_ = l_self_state_list_l_self_state_keys_54_step_ = l_self_state_list_l_self_state_keys_55_step_ = l_self_state_list_l_self_state_keys_56_step_ = l_self_state_list_l_self_state_keys_57_step_ = l_self_state_list_l_self_state_keys_58_step_ = l_self_state_list_l_self_state_keys_59_step_ = l_self_state_list_l_self_state_keys_60_step_ = l_self_state_list_l_self_state_keys_61_step_ = l_self_state_list_l_self_state_keys_62_step_ = l_self_state_list_l_self_state_keys_63_step_ = l_self_state_list_l_self_state_keys_64_step_ = l_self_state_list_l_self_state_keys_65_step_ = l_self_state_list_l_self_state_keys_66_step_ = l_self_state_list_l_self_state_keys_67_step_ = l_self_state_list_l_self_state_keys_68_step_ = l_self_state_list_l_self_state_keys_69_step_ = l_self_state_list_l_self_state_keys_70_step_ = l_self_state_list_l_self_state_keys_71_step_ = l_self_state_list_l_self_state_keys_72_step_ = l_self_state_list_l_self_state_keys_73_step_ = l_self_state_list_l_self_state_keys_74_step_ = l_self_state_list_l_self_state_keys_75_step_ = l_self_state_list_l_self_state_keys_76_step_ = l_self_state_list_l_self_state_keys_77_step_ = l_self_state_list_l_self_state_keys_78_step_ = l_self_state_list_l_self_state_keys_79_step_ = l_self_state_list_l_self_state_keys_80_step_ = l_self_state_list_l_self_state_keys_81_step_ = l_self_state_list_l_self_state_keys_82_step_ = l_self_state_list_l_self_state_keys_83_step_ = l_self_state_list_l_self_state_keys_84_step_ = l_self_state_list_l_self_state_keys_85_step_ = l_self_state_list_l_self_state_keys_86_step_ = l_self_state_list_l_self_state_keys_87_step_ = l_self_state_list_l_self_state_keys_88_step_ = l_self_state_list_l_self_state_keys_89_step_ = l_self_state_list_l_self_state_keys_90_step_ = l_self_state_list_l_self_state_keys_91_step_ = l_self_state_list_l_self_state_keys_92_step_ = l_self_state_list_l_self_state_keys_93_step_ = l_self_state_list_l_self_state_keys_94_step_ = l_self_state_list_l_self_state_keys_95_step_ = l_self_state_list_l_self_state_keys_96_step_ = l_self_state_list_l_self_state_keys_97_step_ = l_self_state_list_l_self_state_keys_98_step_ = l_self_state_list_l_self_state_keys_99_step_ = l_self_state_list_l_self_state_keys_100_step_ = l_self_state_list_l_self_state_keys_101_step_ = l_self_state_list_l_self_state_keys_102_step_ = l_self_state_list_l_self_state_keys_103_step_ = l_self_state_list_l_self_state_keys_104_step_ = l_self_state_list_l_self_state_keys_105_step_ = l_self_state_list_l_self_state_keys_106_step_ = l_self_state_list_l_self_state_keys_107_step_ = l_self_state_list_l_self_state_keys_108_step_ = l_self_state_list_l_self_state_keys_109_step_ = l_self_state_list_l_self_state_keys_110_step_ = l_self_state_list_l_self_state_keys_111_step_ = l_self_state_list_l_self_state_keys_112_step_ = l_self_state_list_l_self_state_keys_113_step_ = l_self_state_list_l_self_state_keys_114_step_ = l_self_state_list_l_self_state_keys_115_step_ = l_self_state_list_l_self_state_keys_116_step_ = l_self_state_list_l_self_state_keys_117_step_ = l_self_state_list_l_self_state_keys_118_step_ = l_self_state_list_l_self_state_keys_119_step_ = l_self_state_list_l_self_state_keys_120_step_ = l_self_state_list_l_self_state_keys_121_step_ = l_self_state_list_l_self_state_keys_122_step_ = l_self_state_list_l_self_state_keys_123_step_ = l_self_state_list_l_self_state_keys_124_step_ = l_self_state_list_l_self_state_keys_125_step_ = l_self_state_list_l_self_state_keys_126_step_ = l_self_state_list_l_self_state_keys_127_step_ = l_self_state_list_l_self_state_keys_128_step_ = l_self_state_list_l_self_state_keys_129_step_ = l_self_state_list_l_self_state_keys_130_step_ = l_self_state_list_l_self_state_keys_131_step_ = l_self_state_list_l_self_state_keys_132_step_ = l_self_state_list_l_self_state_keys_133_step_ = l_self_state_list_l_self_state_keys_134_step_ = l_self_state_list_l_self_state_keys_135_step_ = l_self_state_list_l_self_state_keys_136_step_ = l_self_state_list_l_self_state_keys_137_step_ = l_self_state_list_l_self_state_keys_138_step_ = l_self_state_list_l_self_state_keys_139_step_ = l_self_state_list_l_self_state_keys_140_step_ = l_self_state_list_l_self_state_keys_141_step_ = l_self_state_list_l_self_state_keys_142_step_ = l_self_state_list_l_self_state_keys_143_step_ = l_self_state_list_l_self_state_keys_144_step_ = l_self_state_list_l_self_state_keys_145_step_ = l_self_state_list_l_self_state_keys_146_step_ = l_self_state_list_l_self_state_keys_147_step_ = None
+	        getitem_740: "f32[][]cuda:0" = _foreach_pow_1[0]
+	        getitem_741: "f32[][]cuda:0" = _foreach_pow_1[1]
+	        getitem_742: "f32[][]cuda:0" = _foreach_pow_1[2]
+	        getitem_743: "f32[][]cuda:0" = _foreach_pow_1[3]
+	        getitem_744: "f32[][]cuda:0" = _foreach_pow_1[4]
+	        getitem_745: "f32[][]cuda:0" = _foreach_pow_1[5]
+	        getitem_746: "f32[][]cuda:0" = _foreach_pow_1[6]
+	        getitem_747: "f32[][]cuda:0" = _foreach_pow_1[7]
+	        getitem_748: "f32[][]cuda:0" = _foreach_pow_1[8]
+	        getitem_749: "f32[][]cuda:0" = _foreach_pow_1[9]
+	        getitem_750: "f32[][]cuda:0" = _foreach_pow_1[10]
+	        getitem_751: "f32[][]cuda:0" = _foreach_pow_1[11]
+	        getitem_752: "f32[][]cuda:0" = _foreach_pow_1[12]
+	        getitem_753: "f32[][]cuda:0" = _foreach_pow_1[13]
+	        getitem_754: "f32[][]cuda:0" = _foreach_pow_1[14]
+	        getitem_755: "f32[][]cuda:0" = _foreach_pow_1[15]
+	        getitem_756: "f32[][]cuda:0" = _foreach_pow_1[16]
+	        getitem_757: "f32[][]cuda:0" = _foreach_pow_1[17]
+	        getitem_758: "f32[][]cuda:0" = _foreach_pow_1[18]
+	        getitem_759: "f32[][]cuda:0" = _foreach_pow_1[19]
+	        getitem_760: "f32[][]cuda:0" = _foreach_pow_1[20]
+	        getitem_761: "f32[][]cuda:0" = _foreach_pow_1[21]
+	        getitem_762: "f32[][]cuda:0" = _foreach_pow_1[22]
+	        getitem_763: "f32[][]cuda:0" = _foreach_pow_1[23]
+	        getitem_764: "f32[][]cuda:0" = _foreach_pow_1[24]
+	        getitem_765: "f32[][]cuda:0" = _foreach_pow_1[25]
+	        getitem_766: "f32[][]cuda:0" = _foreach_pow_1[26]
+	        getitem_767: "f32[][]cuda:0" = _foreach_pow_1[27]
+	        getitem_768: "f32[][]cuda:0" = _foreach_pow_1[28]
+	        getitem_769: "f32[][]cuda:0" = _foreach_pow_1[29]
+	        getitem_770: "f32[][]cuda:0" = _foreach_pow_1[30]
+	        getitem_771: "f32[][]cuda:0" = _foreach_pow_1[31]
+	        getitem_772: "f32[][]cuda:0" = _foreach_pow_1[32]
+	        getitem_773: "f32[][]cuda:0" = _foreach_pow_1[33]
+	        getitem_774: "f32[][]cuda:0" = _foreach_pow_1[34]
+	        getitem_775: "f32[][]cuda:0" = _foreach_pow_1[35]
+	        getitem_776: "f32[][]cuda:0" = _foreach_pow_1[36]
+	        getitem_777: "f32[][]cuda:0" = _foreach_pow_1[37]
+	        getitem_778: "f32[][]cuda:0" = _foreach_pow_1[38]
+	        getitem_779: "f32[][]cuda:0" = _foreach_pow_1[39]
+	        getitem_780: "f32[][]cuda:0" = _foreach_pow_1[40]
+	        getitem_781: "f32[][]cuda:0" = _foreach_pow_1[41]
+	        getitem_782: "f32[][]cuda:0" = _foreach_pow_1[42]
+	        getitem_783: "f32[][]cuda:0" = _foreach_pow_1[43]
+	        getitem_784: "f32[][]cuda:0" = _foreach_pow_1[44]
+	        getitem_785: "f32[][]cuda:0" = _foreach_pow_1[45]
+	        getitem_786: "f32[][]cuda:0" = _foreach_pow_1[46]
+	        getitem_787: "f32[][]cuda:0" = _foreach_pow_1[47]
+	        getitem_788: "f32[][]cuda:0" = _foreach_pow_1[48]
+	        getitem_789: "f32[][]cuda:0" = _foreach_pow_1[49]
+	        getitem_790: "f32[][]cuda:0" = _foreach_pow_1[50]
+	        getitem_791: "f32[][]cuda:0" = _foreach_pow_1[51]
+	        getitem_792: "f32[][]cuda:0" = _foreach_pow_1[52]
+	        getitem_793: "f32[][]cuda:0" = _foreach_pow_1[53]
+	        getitem_794: "f32[][]cuda:0" = _foreach_pow_1[54]
+	        getitem_795: "f32[][]cuda:0" = _foreach_pow_1[55]
+	        getitem_796: "f32[][]cuda:0" = _foreach_pow_1[56]
+	        getitem_797: "f32[][]cuda:0" = _foreach_pow_1[57]
+	        getitem_798: "f32[][]cuda:0" = _foreach_pow_1[58]
+	        getitem_799: "f32[][]cuda:0" = _foreach_pow_1[59]
+	        getitem_800: "f32[][]cuda:0" = _foreach_pow_1[60]
+	        getitem_801: "f32[][]cuda:0" = _foreach_pow_1[61]
+	        getitem_802: "f32[][]cuda:0" = _foreach_pow_1[62]
+	        getitem_803: "f32[][]cuda:0" = _foreach_pow_1[63]
+	        getitem_804: "f32[][]cuda:0" = _foreach_pow_1[64]
+	        getitem_805: "f32[][]cuda:0" = _foreach_pow_1[65]
+	        getitem_806: "f32[][]cuda:0" = _foreach_pow_1[66]
+	        getitem_807: "f32[][]cuda:0" = _foreach_pow_1[67]
+	        getitem_808: "f32[][]cuda:0" = _foreach_pow_1[68]
+	        getitem_809: "f32[][]cuda:0" = _foreach_pow_1[69]
+	        getitem_810: "f32[][]cuda:0" = _foreach_pow_1[70]
+	        getitem_811: "f32[][]cuda:0" = _foreach_pow_1[71]
+	        getitem_812: "f32[][]cuda:0" = _foreach_pow_1[72]
+	        getitem_813: "f32[][]cuda:0" = _foreach_pow_1[73]
+	        getitem_814: "f32[][]cuda:0" = _foreach_pow_1[74]
+	        getitem_815: "f32[][]cuda:0" = _foreach_pow_1[75]
+	        getitem_816: "f32[][]cuda:0" = _foreach_pow_1[76]
+	        getitem_817: "f32[][]cuda:0" = _foreach_pow_1[77]
+	        getitem_818: "f32[][]cuda:0" = _foreach_pow_1[78]
+	        getitem_819: "f32[][]cuda:0" = _foreach_pow_1[79]
+	        getitem_820: "f32[][]cuda:0" = _foreach_pow_1[80]
+	        getitem_821: "f32[][]cuda:0" = _foreach_pow_1[81]
+	        getitem_822: "f32[][]cuda:0" = _foreach_pow_1[82]
+	        getitem_823: "f32[][]cuda:0" = _foreach_pow_1[83]
+	        getitem_824: "f32[][]cuda:0" = _foreach_pow_1[84]
+	        getitem_825: "f32[][]cuda:0" = _foreach_pow_1[85]
+	        getitem_826: "f32[][]cuda:0" = _foreach_pow_1[86]
+	        getitem_827: "f32[][]cuda:0" = _foreach_pow_1[87]
+	        getitem_828: "f32[][]cuda:0" = _foreach_pow_1[88]
+	        getitem_829: "f32[][]cuda:0" = _foreach_pow_1[89]
+	        getitem_830: "f32[][]cuda:0" = _foreach_pow_1[90]
+	        getitem_831: "f32[][]cuda:0" = _foreach_pow_1[91]
+	        getitem_832: "f32[][]cuda:0" = _foreach_pow_1[92]
+	        getitem_833: "f32[][]cuda:0" = _foreach_pow_1[93]
+	        getitem_834: "f32[][]cuda:0" = _foreach_pow_1[94]
+	        getitem_835: "f32[][]cuda:0" = _foreach_pow_1[95]
+	        getitem_836: "f32[][]cuda:0" = _foreach_pow_1[96]
+	        getitem_837: "f32[][]cuda:0" = _foreach_pow_1[97]
+	        getitem_838: "f32[][]cuda:0" = _foreach_pow_1[98]
+	        getitem_839: "f32[][]cuda:0" = _foreach_pow_1[99]
+	        getitem_840: "f32[][]cuda:0" = _foreach_pow_1[100]
+	        getitem_841: "f32[][]cuda:0" = _foreach_pow_1[101]
+	        getitem_842: "f32[][]cuda:0" = _foreach_pow_1[102]
+	        getitem_843: "f32[][]cuda:0" = _foreach_pow_1[103]
+	        getitem_844: "f32[][]cuda:0" = _foreach_pow_1[104]
+	        getitem_845: "f32[][]cuda:0" = _foreach_pow_1[105]
+	        getitem_846: "f32[][]cuda:0" = _foreach_pow_1[106]
+	        getitem_847: "f32[][]cuda:0" = _foreach_pow_1[107]
+	        getitem_848: "f32[][]cuda:0" = _foreach_pow_1[108]
+	        getitem_849: "f32[][]cuda:0" = _foreach_pow_1[109]
+	        getitem_850: "f32[][]cuda:0" = _foreach_pow_1[110]
+	        getitem_851: "f32[][]cuda:0" = _foreach_pow_1[111]
+	        getitem_852: "f32[][]cuda:0" = _foreach_pow_1[112]
+	        getitem_853: "f32[][]cuda:0" = _foreach_pow_1[113]
+	        getitem_854: "f32[][]cuda:0" = _foreach_pow_1[114]
+	        getitem_855: "f32[][]cuda:0" = _foreach_pow_1[115]
+	        getitem_856: "f32[][]cuda:0" = _foreach_pow_1[116]
+	        getitem_857: "f32[][]cuda:0" = _foreach_pow_1[117]
+	        getitem_858: "f32[][]cuda:0" = _foreach_pow_1[118]
+	        getitem_859: "f32[][]cuda:0" = _foreach_pow_1[119]
+	        getitem_860: "f32[][]cuda:0" = _foreach_pow_1[120]
+	        getitem_861: "f32[][]cuda:0" = _foreach_pow_1[121]
+	        getitem_862: "f32[][]cuda:0" = _foreach_pow_1[122]
+	        getitem_863: "f32[][]cuda:0" = _foreach_pow_1[123]
+	        getitem_864: "f32[][]cuda:0" = _foreach_pow_1[124]
+	        getitem_865: "f32[][]cuda:0" = _foreach_pow_1[125]
+	        getitem_866: "f32[][]cuda:0" = _foreach_pow_1[126]
+	        getitem_867: "f32[][]cuda:0" = _foreach_pow_1[127]
+	        getitem_868: "f32[][]cuda:0" = _foreach_pow_1[128]
+	        getitem_869: "f32[][]cuda:0" = _foreach_pow_1[129]
+	        getitem_870: "f32[][]cuda:0" = _foreach_pow_1[130]
+	        getitem_871: "f32[][]cuda:0" = _foreach_pow_1[131]
+	        getitem_872: "f32[][]cuda:0" = _foreach_pow_1[132]
+	        getitem_873: "f32[][]cuda:0" = _foreach_pow_1[133]
+	        getitem_874: "f32[][]cuda:0" = _foreach_pow_1[134]
+	        getitem_875: "f32[][]cuda:0" = _foreach_pow_1[135]
+	        getitem_876: "f32[][]cuda:0" = _foreach_pow_1[136]
+	        getitem_877: "f32[][]cuda:0" = _foreach_pow_1[137]
+	        getitem_878: "f32[][]cuda:0" = _foreach_pow_1[138]
+	        getitem_879: "f32[][]cuda:0" = _foreach_pow_1[139]
+	        getitem_880: "f32[][]cuda:0" = _foreach_pow_1[140]
+	        getitem_881: "f32[][]cuda:0" = _foreach_pow_1[141]
+	        getitem_882: "f32[][]cuda:0" = _foreach_pow_1[142]
+	        getitem_883: "f32[][]cuda:0" = _foreach_pow_1[143]
+	        getitem_884: "f32[][]cuda:0" = _foreach_pow_1[144]
+	        getitem_885: "f32[][]cuda:0" = _foreach_pow_1[145]
+	        getitem_886: "f32[][]cuda:0" = _foreach_pow_1[146]
+	        getitem_887: "f32[][]cuda:0" = _foreach_pow_1[147];  _foreach_pow_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1)
+	        _foreach_sub_ = torch._foreach_sub_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739), 1);  _foreach_sub_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1)
+	        _foreach_sub__1 = torch._foreach_sub_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887), 1);  _foreach_sub__1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2)
+	        _foreach_neg_ = torch._foreach_neg_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887));  _foreach_neg_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr)
+	        _foreach_div_ = torch._foreach_div_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739), 0.01);  _foreach_div_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1)
+	        _foreach_reciprocal_ = torch._foreach_reciprocal_((getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739));  _foreach_reciprocal_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2)
+	        _foreach_sqrt_ = torch._foreach_sqrt_((getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887));  _foreach_sqrt_ = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs)
+	        _foreach_sqrt = torch._foreach_sqrt([l_self_state_list_l_self_state_keys_0_exp_avg_sq_, l_self_state_list_l_self_state_keys_1_exp_avg_sq_, l_self_state_list_l_self_state_keys_2_exp_avg_sq_, l_self_state_list_l_self_state_keys_3_exp_avg_sq_, l_self_state_list_l_self_state_keys_4_exp_avg_sq_, l_self_state_list_l_self_state_keys_5_exp_avg_sq_, l_self_state_list_l_self_state_keys_6_exp_avg_sq_, l_self_state_list_l_self_state_keys_7_exp_avg_sq_, l_self_state_list_l_self_state_keys_8_exp_avg_sq_, l_self_state_list_l_self_state_keys_9_exp_avg_sq_, l_self_state_list_l_self_state_keys_10_exp_avg_sq_, l_self_state_list_l_self_state_keys_11_exp_avg_sq_, l_self_state_list_l_self_state_keys_12_exp_avg_sq_, l_self_state_list_l_self_state_keys_13_exp_avg_sq_, l_self_state_list_l_self_state_keys_14_exp_avg_sq_, l_self_state_list_l_self_state_keys_15_exp_avg_sq_, l_self_state_list_l_self_state_keys_16_exp_avg_sq_, l_self_state_list_l_self_state_keys_17_exp_avg_sq_, l_self_state_list_l_self_state_keys_18_exp_avg_sq_, l_self_state_list_l_self_state_keys_19_exp_avg_sq_, l_self_state_list_l_self_state_keys_20_exp_avg_sq_, l_self_state_list_l_self_state_keys_21_exp_avg_sq_, l_self_state_list_l_self_state_keys_22_exp_avg_sq_, l_self_state_list_l_self_state_keys_23_exp_avg_sq_, l_self_state_list_l_self_state_keys_24_exp_avg_sq_, l_self_state_list_l_self_state_keys_25_exp_avg_sq_, l_self_state_list_l_self_state_keys_26_exp_avg_sq_, l_self_state_list_l_self_state_keys_27_exp_avg_sq_, l_self_state_list_l_self_state_keys_28_exp_avg_sq_, l_self_state_list_l_self_state_keys_29_exp_avg_sq_, l_self_state_list_l_self_state_keys_30_exp_avg_sq_, l_self_state_list_l_self_state_keys_31_exp_avg_sq_, l_self_state_list_l_self_state_keys_32_exp_avg_sq_, l_self_state_list_l_self_state_keys_33_exp_avg_sq_, l_self_state_list_l_self_state_keys_34_exp_avg_sq_, l_self_state_list_l_self_state_keys_35_exp_avg_sq_, l_self_state_list_l_self_state_keys_36_exp_avg_sq_, l_self_state_list_l_self_state_keys_37_exp_avg_sq_, l_self_state_list_l_self_state_keys_38_exp_avg_sq_, l_self_state_list_l_self_state_keys_39_exp_avg_sq_, l_self_state_list_l_self_state_keys_40_exp_avg_sq_, l_self_state_list_l_self_state_keys_41_exp_avg_sq_, l_self_state_list_l_self_state_keys_42_exp_avg_sq_, l_self_state_list_l_self_state_keys_43_exp_avg_sq_, l_self_state_list_l_self_state_keys_44_exp_avg_sq_, l_self_state_list_l_self_state_keys_45_exp_avg_sq_, l_self_state_list_l_self_state_keys_46_exp_avg_sq_, l_self_state_list_l_self_state_keys_47_exp_avg_sq_, l_self_state_list_l_self_state_keys_48_exp_avg_sq_, l_self_state_list_l_self_state_keys_49_exp_avg_sq_, l_self_state_list_l_self_state_keys_50_exp_avg_sq_, l_self_state_list_l_self_state_keys_51_exp_avg_sq_, l_self_state_list_l_self_state_keys_52_exp_avg_sq_, l_self_state_list_l_self_state_keys_53_exp_avg_sq_, l_self_state_list_l_self_state_keys_54_exp_avg_sq_, l_self_state_list_l_self_state_keys_55_exp_avg_sq_, l_self_state_list_l_self_state_keys_56_exp_avg_sq_, l_self_state_list_l_self_state_keys_57_exp_avg_sq_, l_self_state_list_l_self_state_keys_58_exp_avg_sq_, l_self_state_list_l_self_state_keys_59_exp_avg_sq_, l_self_state_list_l_self_state_keys_60_exp_avg_sq_, l_self_state_list_l_self_state_keys_61_exp_avg_sq_, l_self_state_list_l_self_state_keys_62_exp_avg_sq_, l_self_state_list_l_self_state_keys_63_exp_avg_sq_, l_self_state_list_l_self_state_keys_64_exp_avg_sq_, l_self_state_list_l_self_state_keys_65_exp_avg_sq_, l_self_state_list_l_self_state_keys_66_exp_avg_sq_, l_self_state_list_l_self_state_keys_67_exp_avg_sq_, l_self_state_list_l_self_state_keys_68_exp_avg_sq_, l_self_state_list_l_self_state_keys_69_exp_avg_sq_, l_self_state_list_l_self_state_keys_70_exp_avg_sq_, l_self_state_list_l_self_state_keys_71_exp_avg_sq_, l_self_state_list_l_self_state_keys_72_exp_avg_sq_, l_self_state_list_l_self_state_keys_73_exp_avg_sq_, l_self_state_list_l_self_state_keys_74_exp_avg_sq_, l_self_state_list_l_self_state_keys_75_exp_avg_sq_, l_self_state_list_l_self_state_keys_76_exp_avg_sq_, l_self_state_list_l_self_state_keys_77_exp_avg_sq_, l_self_state_list_l_self_state_keys_78_exp_avg_sq_, l_self_state_list_l_self_state_keys_79_exp_avg_sq_, l_self_state_list_l_self_state_keys_80_exp_avg_sq_, l_self_state_list_l_self_state_keys_81_exp_avg_sq_, l_self_state_list_l_self_state_keys_82_exp_avg_sq_, l_self_state_list_l_self_state_keys_83_exp_avg_sq_, l_self_state_list_l_self_state_keys_84_exp_avg_sq_, l_self_state_list_l_self_state_keys_85_exp_avg_sq_, l_self_state_list_l_self_state_keys_86_exp_avg_sq_, l_self_state_list_l_self_state_keys_87_exp_avg_sq_, l_self_state_list_l_self_state_keys_88_exp_avg_sq_, l_self_state_list_l_self_state_keys_89_exp_avg_sq_, l_self_state_list_l_self_state_keys_90_exp_avg_sq_, l_self_state_list_l_self_state_keys_91_exp_avg_sq_, l_self_state_list_l_self_state_keys_92_exp_avg_sq_, l_self_state_list_l_self_state_keys_93_exp_avg_sq_, l_self_state_list_l_self_state_keys_94_exp_avg_sq_, l_self_state_list_l_self_state_keys_95_exp_avg_sq_, l_self_state_list_l_self_state_keys_96_exp_avg_sq_, l_self_state_list_l_self_state_keys_97_exp_avg_sq_, l_self_state_list_l_self_state_keys_98_exp_avg_sq_, l_self_state_list_l_self_state_keys_99_exp_avg_sq_, l_self_state_list_l_self_state_keys_100_exp_avg_sq_, l_self_state_list_l_self_state_keys_101_exp_avg_sq_, l_self_state_list_l_self_state_keys_102_exp_avg_sq_, l_self_state_list_l_self_state_keys_103_exp_avg_sq_, l_self_state_list_l_self_state_keys_104_exp_avg_sq_, l_self_state_list_l_self_state_keys_105_exp_avg_sq_, l_self_state_list_l_self_state_keys_106_exp_avg_sq_, l_self_state_list_l_self_state_keys_107_exp_avg_sq_, l_self_state_list_l_self_state_keys_108_exp_avg_sq_, l_self_state_list_l_self_state_keys_109_exp_avg_sq_, l_self_state_list_l_self_state_keys_110_exp_avg_sq_, l_self_state_list_l_self_state_keys_111_exp_avg_sq_, l_self_state_list_l_self_state_keys_112_exp_avg_sq_, l_self_state_list_l_self_state_keys_113_exp_avg_sq_, l_self_state_list_l_self_state_keys_114_exp_avg_sq_, l_self_state_list_l_self_state_keys_115_exp_avg_sq_, l_self_state_list_l_self_state_keys_116_exp_avg_sq_, l_self_state_list_l_self_state_keys_117_exp_avg_sq_, l_self_state_list_l_self_state_keys_118_exp_avg_sq_, l_self_state_list_l_self_state_keys_119_exp_avg_sq_, l_self_state_list_l_self_state_keys_120_exp_avg_sq_, l_self_state_list_l_self_state_keys_121_exp_avg_sq_, l_self_state_list_l_self_state_keys_122_exp_avg_sq_, l_self_state_list_l_self_state_keys_123_exp_avg_sq_, l_self_state_list_l_self_state_keys_124_exp_avg_sq_, l_self_state_list_l_self_state_keys_125_exp_avg_sq_, l_self_state_list_l_self_state_keys_126_exp_avg_sq_, l_self_state_list_l_self_state_keys_127_exp_avg_sq_, l_self_state_list_l_self_state_keys_128_exp_avg_sq_, l_self_state_list_l_self_state_keys_129_exp_avg_sq_, l_self_state_list_l_self_state_keys_130_exp_avg_sq_, l_self_state_list_l_self_state_keys_131_exp_avg_sq_, l_self_state_list_l_self_state_keys_132_exp_avg_sq_, l_self_state_list_l_self_state_keys_133_exp_avg_sq_, l_self_state_list_l_self_state_keys_134_exp_avg_sq_, l_self_state_list_l_self_state_keys_135_exp_avg_sq_, l_self_state_list_l_self_state_keys_136_exp_avg_sq_, l_self_state_list_l_self_state_keys_137_exp_avg_sq_, l_self_state_list_l_self_state_keys_138_exp_avg_sq_, l_self_state_list_l_self_state_keys_139_exp_avg_sq_, l_self_state_list_l_self_state_keys_140_exp_avg_sq_, l_self_state_list_l_self_state_keys_141_exp_avg_sq_, l_self_state_list_l_self_state_keys_142_exp_avg_sq_, l_self_state_list_l_self_state_keys_143_exp_avg_sq_, l_self_state_list_l_self_state_keys_144_exp_avg_sq_, l_self_state_list_l_self_state_keys_145_exp_avg_sq_, l_self_state_list_l_self_state_keys_146_exp_avg_sq_, l_self_state_list_l_self_state_keys_147_exp_avg_sq_]);  l_self_state_list_l_self_state_keys_0_exp_avg_sq_ = l_self_state_list_l_self_state_keys_1_exp_avg_sq_ = l_self_state_list_l_self_state_keys_2_exp_avg_sq_ = l_self_state_list_l_self_state_keys_3_exp_avg_sq_ = l_self_state_list_l_self_state_keys_4_exp_avg_sq_ = l_self_state_list_l_self_state_keys_5_exp_avg_sq_ = l_self_state_list_l_self_state_keys_6_exp_avg_sq_ = l_self_state_list_l_self_state_keys_7_exp_avg_sq_ = l_self_state_list_l_self_state_keys_8_exp_avg_sq_ = l_self_state_list_l_self_state_keys_9_exp_avg_sq_ = l_self_state_list_l_self_state_keys_10_exp_avg_sq_ = l_self_state_list_l_self_state_keys_11_exp_avg_sq_ = l_self_state_list_l_self_state_keys_12_exp_avg_sq_ = l_self_state_list_l_self_state_keys_13_exp_avg_sq_ = l_self_state_list_l_self_state_keys_14_exp_avg_sq_ = l_self_state_list_l_self_state_keys_15_exp_avg_sq_ = l_self_state_list_l_self_state_keys_16_exp_avg_sq_ = l_self_state_list_l_self_state_keys_17_exp_avg_sq_ = l_self_state_list_l_self_state_keys_18_exp_avg_sq_ = l_self_state_list_l_self_state_keys_19_exp_avg_sq_ = l_self_state_list_l_self_state_keys_20_exp_avg_sq_ = l_self_state_list_l_self_state_keys_21_exp_avg_sq_ = l_self_state_list_l_self_state_keys_22_exp_avg_sq_ = l_self_state_list_l_self_state_keys_23_exp_avg_sq_ = l_self_state_list_l_self_state_keys_24_exp_avg_sq_ = l_self_state_list_l_self_state_keys_25_exp_avg_sq_ = l_self_state_list_l_self_state_keys_26_exp_avg_sq_ = l_self_state_list_l_self_state_keys_27_exp_avg_sq_ = l_self_state_list_l_self_state_keys_28_exp_avg_sq_ = l_self_state_list_l_self_state_keys_29_exp_avg_sq_ = l_self_state_list_l_self_state_keys_30_exp_avg_sq_ = l_self_state_list_l_self_state_keys_31_exp_avg_sq_ = l_self_state_list_l_self_state_keys_32_exp_avg_sq_ = l_self_state_list_l_self_state_keys_33_exp_avg_sq_ = l_self_state_list_l_self_state_keys_34_exp_avg_sq_ = l_self_state_list_l_self_state_keys_35_exp_avg_sq_ = l_self_state_list_l_self_state_keys_36_exp_avg_sq_ = l_self_state_list_l_self_state_keys_37_exp_avg_sq_ = l_self_state_list_l_self_state_keys_38_exp_avg_sq_ = l_self_state_list_l_self_state_keys_39_exp_avg_sq_ = l_self_state_list_l_self_state_keys_40_exp_avg_sq_ = l_self_state_list_l_self_state_keys_41_exp_avg_sq_ = l_self_state_list_l_self_state_keys_42_exp_avg_sq_ = l_self_state_list_l_self_state_keys_43_exp_avg_sq_ = l_self_state_list_l_self_state_keys_44_exp_avg_sq_ = l_self_state_list_l_self_state_keys_45_exp_avg_sq_ = l_self_state_list_l_self_state_keys_46_exp_avg_sq_ = l_self_state_list_l_self_state_keys_47_exp_avg_sq_ = l_self_state_list_l_self_state_keys_48_exp_avg_sq_ = l_self_state_list_l_self_state_keys_49_exp_avg_sq_ = l_self_state_list_l_self_state_keys_50_exp_avg_sq_ = l_self_state_list_l_self_state_keys_51_exp_avg_sq_ = l_self_state_list_l_self_state_keys_52_exp_avg_sq_ = l_self_state_list_l_self_state_keys_53_exp_avg_sq_ = l_self_state_list_l_self_state_keys_54_exp_avg_sq_ = l_self_state_list_l_self_state_keys_55_exp_avg_sq_ = l_self_state_list_l_self_state_keys_56_exp_avg_sq_ = l_self_state_list_l_self_state_keys_57_exp_avg_sq_ = l_self_state_list_l_self_state_keys_58_exp_avg_sq_ = l_self_state_list_l_self_state_keys_59_exp_avg_sq_ = l_self_state_list_l_self_state_keys_60_exp_avg_sq_ = l_self_state_list_l_self_state_keys_61_exp_avg_sq_ = l_self_state_list_l_self_state_keys_62_exp_avg_sq_ = l_self_state_list_l_self_state_keys_63_exp_avg_sq_ = l_self_state_list_l_self_state_keys_64_exp_avg_sq_ = l_self_state_list_l_self_state_keys_65_exp_avg_sq_ = l_self_state_list_l_self_state_keys_66_exp_avg_sq_ = l_self_state_list_l_self_state_keys_67_exp_avg_sq_ = l_self_state_list_l_self_state_keys_68_exp_avg_sq_ = l_self_state_list_l_self_state_keys_69_exp_avg_sq_ = l_self_state_list_l_self_state_keys_70_exp_avg_sq_ = l_self_state_list_l_self_state_keys_71_exp_avg_sq_ = l_self_state_list_l_self_state_keys_72_exp_avg_sq_ = l_self_state_list_l_self_state_keys_73_exp_avg_sq_ = l_self_state_list_l_self_state_keys_74_exp_avg_sq_ = l_self_state_list_l_self_state_keys_75_exp_avg_sq_ = l_self_state_list_l_self_state_keys_76_exp_avg_sq_ = l_self_state_list_l_self_state_keys_77_exp_avg_sq_ = l_self_state_list_l_self_state_keys_78_exp_avg_sq_ = l_self_state_list_l_self_state_keys_79_exp_avg_sq_ = l_self_state_list_l_self_state_keys_80_exp_avg_sq_ = l_self_state_list_l_self_state_keys_81_exp_avg_sq_ = l_self_state_list_l_self_state_keys_82_exp_avg_sq_ = l_self_state_list_l_self_state_keys_83_exp_avg_sq_ = l_self_state_list_l_self_state_keys_84_exp_avg_sq_ = l_self_state_list_l_self_state_keys_85_exp_avg_sq_ = l_self_state_list_l_self_state_keys_86_exp_avg_sq_ = l_self_state_list_l_self_state_keys_87_exp_avg_sq_ = l_self_state_list_l_self_state_keys_88_exp_avg_sq_ = l_self_state_list_l_self_state_keys_89_exp_avg_sq_ = l_self_state_list_l_self_state_keys_90_exp_avg_sq_ = l_self_state_list_l_self_state_keys_91_exp_avg_sq_ = l_self_state_list_l_self_state_keys_92_exp_avg_sq_ = l_self_state_list_l_self_state_keys_93_exp_avg_sq_ = l_self_state_list_l_self_state_keys_94_exp_avg_sq_ = l_self_state_list_l_self_state_keys_95_exp_avg_sq_ = l_self_state_list_l_self_state_keys_96_exp_avg_sq_ = l_self_state_list_l_self_state_keys_97_exp_avg_sq_ = l_self_state_list_l_self_state_keys_98_exp_avg_sq_ = l_self_state_list_l_self_state_keys_99_exp_avg_sq_ = l_self_state_list_l_self_state_keys_100_exp_avg_sq_ = l_self_state_list_l_self_state_keys_101_exp_avg_sq_ = l_self_state_list_l_self_state_keys_102_exp_avg_sq_ = l_self_state_list_l_self_state_keys_103_exp_avg_sq_ = l_self_state_list_l_self_state_keys_104_exp_avg_sq_ = l_self_state_list_l_self_state_keys_105_exp_avg_sq_ = l_self_state_list_l_self_state_keys_106_exp_avg_sq_ = l_self_state_list_l_self_state_keys_107_exp_avg_sq_ = l_self_state_list_l_self_state_keys_108_exp_avg_sq_ = l_self_state_list_l_self_state_keys_109_exp_avg_sq_ = l_self_state_list_l_self_state_keys_110_exp_avg_sq_ = l_self_state_list_l_self_state_keys_111_exp_avg_sq_ = l_self_state_list_l_self_state_keys_112_exp_avg_sq_ = l_self_state_list_l_self_state_keys_113_exp_avg_sq_ = l_self_state_list_l_self_state_keys_114_exp_avg_sq_ = l_self_state_list_l_self_state_keys_115_exp_avg_sq_ = l_self_state_list_l_self_state_keys_116_exp_avg_sq_ = l_self_state_list_l_self_state_keys_117_exp_avg_sq_ = l_self_state_list_l_self_state_keys_118_exp_avg_sq_ = l_self_state_list_l_self_state_keys_119_exp_avg_sq_ = l_self_state_list_l_self_state_keys_120_exp_avg_sq_ = l_self_state_list_l_self_state_keys_121_exp_avg_sq_ = l_self_state_list_l_self_state_keys_122_exp_avg_sq_ = l_self_state_list_l_self_state_keys_123_exp_avg_sq_ = l_self_state_list_l_self_state_keys_124_exp_avg_sq_ = l_self_state_list_l_self_state_keys_125_exp_avg_sq_ = l_self_state_list_l_self_state_keys_126_exp_avg_sq_ = l_self_state_list_l_self_state_keys_127_exp_avg_sq_ = l_self_state_list_l_self_state_keys_128_exp_avg_sq_ = l_self_state_list_l_self_state_keys_129_exp_avg_sq_ = l_self_state_list_l_self_state_keys_130_exp_avg_sq_ = l_self_state_list_l_self_state_keys_131_exp_avg_sq_ = l_self_state_list_l_self_state_keys_132_exp_avg_sq_ = l_self_state_list_l_self_state_keys_133_exp_avg_sq_ = l_self_state_list_l_self_state_keys_134_exp_avg_sq_ = l_self_state_list_l_self_state_keys_135_exp_avg_sq_ = l_self_state_list_l_self_state_keys_136_exp_avg_sq_ = l_self_state_list_l_self_state_keys_137_exp_avg_sq_ = l_self_state_list_l_self_state_keys_138_exp_avg_sq_ = l_self_state_list_l_self_state_keys_139_exp_avg_sq_ = l_self_state_list_l_self_state_keys_140_exp_avg_sq_ = l_self_state_list_l_self_state_keys_141_exp_avg_sq_ = l_self_state_list_l_self_state_keys_142_exp_avg_sq_ = l_self_state_list_l_self_state_keys_143_exp_avg_sq_ = l_self_state_list_l_self_state_keys_144_exp_avg_sq_ = l_self_state_list_l_self_state_keys_145_exp_avg_sq_ = l_self_state_list_l_self_state_keys_146_exp_avg_sq_ = l_self_state_list_l_self_state_keys_147_exp_avg_sq_ = None
+	        getitem_1776: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt[0]
+	        getitem_1777: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt[1]
+	        getitem_1778: "f32[768][1]cuda:0" = _foreach_sqrt[2]
+	        getitem_1779: "f32[768][1]cuda:0" = _foreach_sqrt[3]
+	        getitem_1780: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[4]
+	        getitem_1781: "f32[2304][1]cuda:0" = _foreach_sqrt[5]
+	        getitem_1782: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[6]
+	        getitem_1783: "f32[768][1]cuda:0" = _foreach_sqrt[7]
+	        getitem_1784: "f32[768][1]cuda:0" = _foreach_sqrt[8]
+	        getitem_1785: "f32[768][1]cuda:0" = _foreach_sqrt[9]
+	        getitem_1786: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[10]
+	        getitem_1787: "f32[3072][1]cuda:0" = _foreach_sqrt[11]
+	        getitem_1788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[12]
+	        getitem_1789: "f32[768][1]cuda:0" = _foreach_sqrt[13]
+	        getitem_1790: "f32[768][1]cuda:0" = _foreach_sqrt[14]
+	        getitem_1791: "f32[768][1]cuda:0" = _foreach_sqrt[15]
+	        getitem_1792: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[16]
+	        getitem_1793: "f32[2304][1]cuda:0" = _foreach_sqrt[17]
+	        getitem_1794: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[18]
+	        getitem_1795: "f32[768][1]cuda:0" = _foreach_sqrt[19]
+	        getitem_1796: "f32[768][1]cuda:0" = _foreach_sqrt[20]
+	        getitem_1797: "f32[768][1]cuda:0" = _foreach_sqrt[21]
+	        getitem_1798: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[22]
+	        getitem_1799: "f32[3072][1]cuda:0" = _foreach_sqrt[23]
+	        getitem_1800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[24]
+	        getitem_1801: "f32[768][1]cuda:0" = _foreach_sqrt[25]
+	        getitem_1802: "f32[768][1]cuda:0" = _foreach_sqrt[26]
+	        getitem_1803: "f32[768][1]cuda:0" = _foreach_sqrt[27]
+	        getitem_1804: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[28]
+	        getitem_1805: "f32[2304][1]cuda:0" = _foreach_sqrt[29]
+	        getitem_1806: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[30]
+	        getitem_1807: "f32[768][1]cuda:0" = _foreach_sqrt[31]
+	        getitem_1808: "f32[768][1]cuda:0" = _foreach_sqrt[32]
+	        getitem_1809: "f32[768][1]cuda:0" = _foreach_sqrt[33]
+	        getitem_1810: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[34]
+	        getitem_1811: "f32[3072][1]cuda:0" = _foreach_sqrt[35]
+	        getitem_1812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[36]
+	        getitem_1813: "f32[768][1]cuda:0" = _foreach_sqrt[37]
+	        getitem_1814: "f32[768][1]cuda:0" = _foreach_sqrt[38]
+	        getitem_1815: "f32[768][1]cuda:0" = _foreach_sqrt[39]
+	        getitem_1816: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[40]
+	        getitem_1817: "f32[2304][1]cuda:0" = _foreach_sqrt[41]
+	        getitem_1818: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[42]
+	        getitem_1819: "f32[768][1]cuda:0" = _foreach_sqrt[43]
+	        getitem_1820: "f32[768][1]cuda:0" = _foreach_sqrt[44]
+	        getitem_1821: "f32[768][1]cuda:0" = _foreach_sqrt[45]
+	        getitem_1822: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[46]
+	        getitem_1823: "f32[3072][1]cuda:0" = _foreach_sqrt[47]
+	        getitem_1824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[48]
+	        getitem_1825: "f32[768][1]cuda:0" = _foreach_sqrt[49]
+	        getitem_1826: "f32[768][1]cuda:0" = _foreach_sqrt[50]
+	        getitem_1827: "f32[768][1]cuda:0" = _foreach_sqrt[51]
+	        getitem_1828: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[52]
+	        getitem_1829: "f32[2304][1]cuda:0" = _foreach_sqrt[53]
+	        getitem_1830: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[54]
+	        getitem_1831: "f32[768][1]cuda:0" = _foreach_sqrt[55]
+	        getitem_1832: "f32[768][1]cuda:0" = _foreach_sqrt[56]
+	        getitem_1833: "f32[768][1]cuda:0" = _foreach_sqrt[57]
+	        getitem_1834: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[58]
+	        getitem_1835: "f32[3072][1]cuda:0" = _foreach_sqrt[59]
+	        getitem_1836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[60]
+	        getitem_1837: "f32[768][1]cuda:0" = _foreach_sqrt[61]
+	        getitem_1838: "f32[768][1]cuda:0" = _foreach_sqrt[62]
+	        getitem_1839: "f32[768][1]cuda:0" = _foreach_sqrt[63]
+	        getitem_1840: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[64]
+	        getitem_1841: "f32[2304][1]cuda:0" = _foreach_sqrt[65]
+	        getitem_1842: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[66]
+	        getitem_1843: "f32[768][1]cuda:0" = _foreach_sqrt[67]
+	        getitem_1844: "f32[768][1]cuda:0" = _foreach_sqrt[68]
+	        getitem_1845: "f32[768][1]cuda:0" = _foreach_sqrt[69]
+	        getitem_1846: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[70]
+	        getitem_1847: "f32[3072][1]cuda:0" = _foreach_sqrt[71]
+	        getitem_1848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[72]
+	        getitem_1849: "f32[768][1]cuda:0" = _foreach_sqrt[73]
+	        getitem_1850: "f32[768][1]cuda:0" = _foreach_sqrt[74]
+	        getitem_1851: "f32[768][1]cuda:0" = _foreach_sqrt[75]
+	        getitem_1852: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[76]
+	        getitem_1853: "f32[2304][1]cuda:0" = _foreach_sqrt[77]
+	        getitem_1854: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[78]
+	        getitem_1855: "f32[768][1]cuda:0" = _foreach_sqrt[79]
+	        getitem_1856: "f32[768][1]cuda:0" = _foreach_sqrt[80]
+	        getitem_1857: "f32[768][1]cuda:0" = _foreach_sqrt[81]
+	        getitem_1858: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[82]
+	        getitem_1859: "f32[3072][1]cuda:0" = _foreach_sqrt[83]
+	        getitem_1860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[84]
+	        getitem_1861: "f32[768][1]cuda:0" = _foreach_sqrt[85]
+	        getitem_1862: "f32[768][1]cuda:0" = _foreach_sqrt[86]
+	        getitem_1863: "f32[768][1]cuda:0" = _foreach_sqrt[87]
+	        getitem_1864: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[88]
+	        getitem_1865: "f32[2304][1]cuda:0" = _foreach_sqrt[89]
+	        getitem_1866: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[90]
+	        getitem_1867: "f32[768][1]cuda:0" = _foreach_sqrt[91]
+	        getitem_1868: "f32[768][1]cuda:0" = _foreach_sqrt[92]
+	        getitem_1869: "f32[768][1]cuda:0" = _foreach_sqrt[93]
+	        getitem_1870: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[94]
+	        getitem_1871: "f32[3072][1]cuda:0" = _foreach_sqrt[95]
+	        getitem_1872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[96]
+	        getitem_1873: "f32[768][1]cuda:0" = _foreach_sqrt[97]
+	        getitem_1874: "f32[768][1]cuda:0" = _foreach_sqrt[98]
+	        getitem_1875: "f32[768][1]cuda:0" = _foreach_sqrt[99]
+	        getitem_1876: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[100]
+	        getitem_1877: "f32[2304][1]cuda:0" = _foreach_sqrt[101]
+	        getitem_1878: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[102]
+	        getitem_1879: "f32[768][1]cuda:0" = _foreach_sqrt[103]
+	        getitem_1880: "f32[768][1]cuda:0" = _foreach_sqrt[104]
+	        getitem_1881: "f32[768][1]cuda:0" = _foreach_sqrt[105]
+	        getitem_1882: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[106]
+	        getitem_1883: "f32[3072][1]cuda:0" = _foreach_sqrt[107]
+	        getitem_1884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[108]
+	        getitem_1885: "f32[768][1]cuda:0" = _foreach_sqrt[109]
+	        getitem_1886: "f32[768][1]cuda:0" = _foreach_sqrt[110]
+	        getitem_1887: "f32[768][1]cuda:0" = _foreach_sqrt[111]
+	        getitem_1888: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[112]
+	        getitem_1889: "f32[2304][1]cuda:0" = _foreach_sqrt[113]
+	        getitem_1890: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[114]
+	        getitem_1891: "f32[768][1]cuda:0" = _foreach_sqrt[115]
+	        getitem_1892: "f32[768][1]cuda:0" = _foreach_sqrt[116]
+	        getitem_1893: "f32[768][1]cuda:0" = _foreach_sqrt[117]
+	        getitem_1894: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[118]
+	        getitem_1895: "f32[3072][1]cuda:0" = _foreach_sqrt[119]
+	        getitem_1896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[120]
+	        getitem_1897: "f32[768][1]cuda:0" = _foreach_sqrt[121]
+	        getitem_1898: "f32[768][1]cuda:0" = _foreach_sqrt[122]
+	        getitem_1899: "f32[768][1]cuda:0" = _foreach_sqrt[123]
+	        getitem_1900: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[124]
+	        getitem_1901: "f32[2304][1]cuda:0" = _foreach_sqrt[125]
+	        getitem_1902: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[126]
+	        getitem_1903: "f32[768][1]cuda:0" = _foreach_sqrt[127]
+	        getitem_1904: "f32[768][1]cuda:0" = _foreach_sqrt[128]
+	        getitem_1905: "f32[768][1]cuda:0" = _foreach_sqrt[129]
+	        getitem_1906: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[130]
+	        getitem_1907: "f32[3072][1]cuda:0" = _foreach_sqrt[131]
+	        getitem_1908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[132]
+	        getitem_1909: "f32[768][1]cuda:0" = _foreach_sqrt[133]
+	        getitem_1910: "f32[768][1]cuda:0" = _foreach_sqrt[134]
+	        getitem_1911: "f32[768][1]cuda:0" = _foreach_sqrt[135]
+	        getitem_1912: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt[136]
+	        getitem_1913: "f32[2304][1]cuda:0" = _foreach_sqrt[137]
+	        getitem_1914: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt[138]
+	        getitem_1915: "f32[768][1]cuda:0" = _foreach_sqrt[139]
+	        getitem_1916: "f32[768][1]cuda:0" = _foreach_sqrt[140]
+	        getitem_1917: "f32[768][1]cuda:0" = _foreach_sqrt[141]
+	        getitem_1918: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt[142]
+	        getitem_1919: "f32[3072][1]cuda:0" = _foreach_sqrt[143]
+	        getitem_1920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt[144]
+	        getitem_1921: "f32[768][1]cuda:0" = _foreach_sqrt[145]
+	        getitem_1922: "f32[768][1]cuda:0" = _foreach_sqrt[146]
+	        getitem_1923: "f32[768][1]cuda:0" = _foreach_sqrt[147];  _foreach_sqrt = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt)
+	        _foreach_div__1 = torch._foreach_div_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), (getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887));  getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = _foreach_div__1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps)
+	        _foreach_add__1 = torch._foreach_add_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), 1e-08);  _foreach_add__1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size)
+	        _foreach_div__2 = torch._foreach_div_((getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923), (getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739));  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = _foreach_div__2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt)
+	        _foreach_addcdiv_ = torch._foreach_addcdiv_([l_self_param_groups_0_params_0_, l_self_param_groups_0_params_1_, l_self_param_groups_0_params_2_, l_self_param_groups_0_params_3_, l_self_param_groups_0_params_4_, l_self_param_groups_0_params_5_, l_self_param_groups_0_params_6_, l_self_param_groups_0_params_7_, l_self_param_groups_0_params_8_, l_self_param_groups_0_params_9_, l_self_param_groups_0_params_10_, l_self_param_groups_0_params_11_, l_self_param_groups_0_params_12_, l_self_param_groups_0_params_13_, l_self_param_groups_0_params_14_, l_self_param_groups_0_params_15_, l_self_param_groups_0_params_16_, l_self_param_groups_0_params_17_, l_self_param_groups_0_params_18_, l_self_param_groups_0_params_19_, l_self_param_groups_0_params_20_, l_self_param_groups_0_params_21_, l_self_param_groups_0_params_22_, l_self_param_groups_0_params_23_, l_self_param_groups_0_params_24_, l_self_param_groups_0_params_25_, l_self_param_groups_0_params_26_, l_self_param_groups_0_params_27_, l_self_param_groups_0_params_28_, l_self_param_groups_0_params_29_, l_self_param_groups_0_params_30_, l_self_param_groups_0_params_31_, l_self_param_groups_0_params_32_, l_self_param_groups_0_params_33_, l_self_param_groups_0_params_34_, l_self_param_groups_0_params_35_, l_self_param_groups_0_params_36_, l_self_param_groups_0_params_37_, l_self_param_groups_0_params_38_, l_self_param_groups_0_params_39_, l_self_param_groups_0_params_40_, l_self_param_groups_0_params_41_, l_self_param_groups_0_params_42_, l_self_param_groups_0_params_43_, l_self_param_groups_0_params_44_, l_self_param_groups_0_params_45_, l_self_param_groups_0_params_46_, l_self_param_groups_0_params_47_, l_self_param_groups_0_params_48_, l_self_param_groups_0_params_49_, l_self_param_groups_0_params_50_, l_self_param_groups_0_params_51_, l_self_param_groups_0_params_52_, l_self_param_groups_0_params_53_, l_self_param_groups_0_params_54_, l_self_param_groups_0_params_55_, l_self_param_groups_0_params_56_, l_self_param_groups_0_params_57_, l_self_param_groups_0_params_58_, l_self_param_groups_0_params_59_, l_self_param_groups_0_params_60_, l_self_param_groups_0_params_61_, l_self_param_groups_0_params_62_, l_self_param_groups_0_params_63_, l_self_param_groups_0_params_64_, l_self_param_groups_0_params_65_, l_self_param_groups_0_params_66_, l_self_param_groups_0_params_67_, l_self_param_groups_0_params_68_, l_self_param_groups_0_params_69_, l_self_param_groups_0_params_70_, l_self_param_groups_0_params_71_, l_self_param_groups_0_params_72_, l_self_param_groups_0_params_73_, l_self_param_groups_0_params_74_, l_self_param_groups_0_params_75_, l_self_param_groups_0_params_76_, l_self_param_groups_0_params_77_, l_self_param_groups_0_params_78_, l_self_param_groups_0_params_79_, l_self_param_groups_0_params_80_, l_self_param_groups_0_params_81_, l_self_param_groups_0_params_82_, l_self_param_groups_0_params_83_, l_self_param_groups_0_params_84_, l_self_param_groups_0_params_85_, l_self_param_groups_0_params_86_, l_self_param_groups_0_params_87_, l_self_param_groups_0_params_88_, l_self_param_groups_0_params_89_, l_self_param_groups_0_params_90_, l_self_param_groups_0_params_91_, l_self_param_groups_0_params_92_, l_self_param_groups_0_params_93_, l_self_param_groups_0_params_94_, l_self_param_groups_0_params_95_, l_self_param_groups_0_params_96_, l_self_param_groups_0_params_97_, l_self_param_groups_0_params_98_, l_self_param_groups_0_params_99_, l_self_param_groups_0_params_100_, l_self_param_groups_0_params_101_, l_self_param_groups_0_params_102_, l_self_param_groups_0_params_103_, l_self_param_groups_0_params_104_, l_self_param_groups_0_params_105_, l_self_param_groups_0_params_106_, l_self_param_groups_0_params_107_, l_self_param_groups_0_params_108_, l_self_param_groups_0_params_109_, l_self_param_groups_0_params_110_, l_self_param_groups_0_params_111_, l_self_param_groups_0_params_112_, l_self_param_groups_0_params_113_, l_self_param_groups_0_params_114_, l_self_param_groups_0_params_115_, l_self_param_groups_0_params_116_, l_self_param_groups_0_params_117_, l_self_param_groups_0_params_118_, l_self_param_groups_0_params_119_, l_self_param_groups_0_params_120_, l_self_param_groups_0_params_121_, l_self_param_groups_0_params_122_, l_self_param_groups_0_params_123_, l_self_param_groups_0_params_124_, l_self_param_groups_0_params_125_, l_self_param_groups_0_params_126_, l_self_param_groups_0_params_127_, l_self_param_groups_0_params_128_, l_self_param_groups_0_params_129_, l_self_param_groups_0_params_130_, l_self_param_groups_0_params_131_, l_self_param_groups_0_params_132_, l_self_param_groups_0_params_133_, l_self_param_groups_0_params_134_, l_self_param_groups_0_params_135_, l_self_param_groups_0_params_136_, l_self_param_groups_0_params_137_, l_self_param_groups_0_params_138_, l_self_param_groups_0_params_139_, l_self_param_groups_0_params_140_, l_self_param_groups_0_params_141_, l_self_param_groups_0_params_142_, l_self_param_groups_0_params_143_, l_self_param_groups_0_params_144_, l_self_param_groups_0_params_145_, l_self_param_groups_0_params_146_, l_self_param_groups_0_params_147_], [l_self_state_list_l_self_state_keys_0_exp_avg_, l_self_state_list_l_self_state_keys_1_exp_avg_, l_self_state_list_l_self_state_keys_2_exp_avg_, l_self_state_list_l_self_state_keys_3_exp_avg_, l_self_state_list_l_self_state_keys_4_exp_avg_, l_self_state_list_l_self_state_keys_5_exp_avg_, l_self_state_list_l_self_state_keys_6_exp_avg_, l_self_state_list_l_self_state_keys_7_exp_avg_, l_self_state_list_l_self_state_keys_8_exp_avg_, l_self_state_list_l_self_state_keys_9_exp_avg_, l_self_state_list_l_self_state_keys_10_exp_avg_, l_self_state_list_l_self_state_keys_11_exp_avg_, l_self_state_list_l_self_state_keys_12_exp_avg_, l_self_state_list_l_self_state_keys_13_exp_avg_, l_self_state_list_l_self_state_keys_14_exp_avg_, l_self_state_list_l_self_state_keys_15_exp_avg_, l_self_state_list_l_self_state_keys_16_exp_avg_, l_self_state_list_l_self_state_keys_17_exp_avg_, l_self_state_list_l_self_state_keys_18_exp_avg_, l_self_state_list_l_self_state_keys_19_exp_avg_, l_self_state_list_l_self_state_keys_20_exp_avg_, l_self_state_list_l_self_state_keys_21_exp_avg_, l_self_state_list_l_self_state_keys_22_exp_avg_, l_self_state_list_l_self_state_keys_23_exp_avg_, l_self_state_list_l_self_state_keys_24_exp_avg_, l_self_state_list_l_self_state_keys_25_exp_avg_, l_self_state_list_l_self_state_keys_26_exp_avg_, l_self_state_list_l_self_state_keys_27_exp_avg_, l_self_state_list_l_self_state_keys_28_exp_avg_, l_self_state_list_l_self_state_keys_29_exp_avg_, l_self_state_list_l_self_state_keys_30_exp_avg_, l_self_state_list_l_self_state_keys_31_exp_avg_, l_self_state_list_l_self_state_keys_32_exp_avg_, l_self_state_list_l_self_state_keys_33_exp_avg_, l_self_state_list_l_self_state_keys_34_exp_avg_, l_self_state_list_l_self_state_keys_35_exp_avg_, l_self_state_list_l_self_state_keys_36_exp_avg_, l_self_state_list_l_self_state_keys_37_exp_avg_, l_self_state_list_l_self_state_keys_38_exp_avg_, l_self_state_list_l_self_state_keys_39_exp_avg_, l_self_state_list_l_self_state_keys_40_exp_avg_, l_self_state_list_l_self_state_keys_41_exp_avg_, l_self_state_list_l_self_state_keys_42_exp_avg_, l_self_state_list_l_self_state_keys_43_exp_avg_, l_self_state_list_l_self_state_keys_44_exp_avg_, l_self_state_list_l_self_state_keys_45_exp_avg_, l_self_state_list_l_self_state_keys_46_exp_avg_, l_self_state_list_l_self_state_keys_47_exp_avg_, l_self_state_list_l_self_state_keys_48_exp_avg_, l_self_state_list_l_self_state_keys_49_exp_avg_, l_self_state_list_l_self_state_keys_50_exp_avg_, l_self_state_list_l_self_state_keys_51_exp_avg_, l_self_state_list_l_self_state_keys_52_exp_avg_, l_self_state_list_l_self_state_keys_53_exp_avg_, l_self_state_list_l_self_state_keys_54_exp_avg_, l_self_state_list_l_self_state_keys_55_exp_avg_, l_self_state_list_l_self_state_keys_56_exp_avg_, l_self_state_list_l_self_state_keys_57_exp_avg_, l_self_state_list_l_self_state_keys_58_exp_avg_, l_self_state_list_l_self_state_keys_59_exp_avg_, l_self_state_list_l_self_state_keys_60_exp_avg_, l_self_state_list_l_self_state_keys_61_exp_avg_, l_self_state_list_l_self_state_keys_62_exp_avg_, l_self_state_list_l_self_state_keys_63_exp_avg_, l_self_state_list_l_self_state_keys_64_exp_avg_, l_self_state_list_l_self_state_keys_65_exp_avg_, l_self_state_list_l_self_state_keys_66_exp_avg_, l_self_state_list_l_self_state_keys_67_exp_avg_, l_self_state_list_l_self_state_keys_68_exp_avg_, l_self_state_list_l_self_state_keys_69_exp_avg_, l_self_state_list_l_self_state_keys_70_exp_avg_, l_self_state_list_l_self_state_keys_71_exp_avg_, l_self_state_list_l_self_state_keys_72_exp_avg_, l_self_state_list_l_self_state_keys_73_exp_avg_, l_self_state_list_l_self_state_keys_74_exp_avg_, l_self_state_list_l_self_state_keys_75_exp_avg_, l_self_state_list_l_self_state_keys_76_exp_avg_, l_self_state_list_l_self_state_keys_77_exp_avg_, l_self_state_list_l_self_state_keys_78_exp_avg_, l_self_state_list_l_self_state_keys_79_exp_avg_, l_self_state_list_l_self_state_keys_80_exp_avg_, l_self_state_list_l_self_state_keys_81_exp_avg_, l_self_state_list_l_self_state_keys_82_exp_avg_, l_self_state_list_l_self_state_keys_83_exp_avg_, l_self_state_list_l_self_state_keys_84_exp_avg_, l_self_state_list_l_self_state_keys_85_exp_avg_, l_self_state_list_l_self_state_keys_86_exp_avg_, l_self_state_list_l_self_state_keys_87_exp_avg_, l_self_state_list_l_self_state_keys_88_exp_avg_, l_self_state_list_l_self_state_keys_89_exp_avg_, l_self_state_list_l_self_state_keys_90_exp_avg_, l_self_state_list_l_self_state_keys_91_exp_avg_, l_self_state_list_l_self_state_keys_92_exp_avg_, l_self_state_list_l_self_state_keys_93_exp_avg_, l_self_state_list_l_self_state_keys_94_exp_avg_, l_self_state_list_l_self_state_keys_95_exp_avg_, l_self_state_list_l_self_state_keys_96_exp_avg_, l_self_state_list_l_self_state_keys_97_exp_avg_, l_self_state_list_l_self_state_keys_98_exp_avg_, l_self_state_list_l_self_state_keys_99_exp_avg_, l_self_state_list_l_self_state_keys_100_exp_avg_, l_self_state_list_l_self_state_keys_101_exp_avg_, l_self_state_list_l_self_state_keys_102_exp_avg_, l_self_state_list_l_self_state_keys_103_exp_avg_, l_self_state_list_l_self_state_keys_104_exp_avg_, l_self_state_list_l_self_state_keys_105_exp_avg_, l_self_state_list_l_self_state_keys_106_exp_avg_, l_self_state_list_l_self_state_keys_107_exp_avg_, l_self_state_list_l_self_state_keys_108_exp_avg_, l_self_state_list_l_self_state_keys_109_exp_avg_, l_self_state_list_l_self_state_keys_110_exp_avg_, l_self_state_list_l_self_state_keys_111_exp_avg_, l_self_state_list_l_self_state_keys_112_exp_avg_, l_self_state_list_l_self_state_keys_113_exp_avg_, l_self_state_list_l_self_state_keys_114_exp_avg_, l_self_state_list_l_self_state_keys_115_exp_avg_, l_self_state_list_l_self_state_keys_116_exp_avg_, l_self_state_list_l_self_state_keys_117_exp_avg_, l_self_state_list_l_self_state_keys_118_exp_avg_, l_self_state_list_l_self_state_keys_119_exp_avg_, l_self_state_list_l_self_state_keys_120_exp_avg_, l_self_state_list_l_self_state_keys_121_exp_avg_, l_self_state_list_l_self_state_keys_122_exp_avg_, l_self_state_list_l_self_state_keys_123_exp_avg_, l_self_state_list_l_self_state_keys_124_exp_avg_, l_self_state_list_l_self_state_keys_125_exp_avg_, l_self_state_list_l_self_state_keys_126_exp_avg_, l_self_state_list_l_self_state_keys_127_exp_avg_, l_self_state_list_l_self_state_keys_128_exp_avg_, l_self_state_list_l_self_state_keys_129_exp_avg_, l_self_state_list_l_self_state_keys_130_exp_avg_, l_self_state_list_l_self_state_keys_131_exp_avg_, l_self_state_list_l_self_state_keys_132_exp_avg_, l_self_state_list_l_self_state_keys_133_exp_avg_, l_self_state_list_l_self_state_keys_134_exp_avg_, l_self_state_list_l_self_state_keys_135_exp_avg_, l_self_state_list_l_self_state_keys_136_exp_avg_, l_self_state_list_l_self_state_keys_137_exp_avg_, l_self_state_list_l_self_state_keys_138_exp_avg_, l_self_state_list_l_self_state_keys_139_exp_avg_, l_self_state_list_l_self_state_keys_140_exp_avg_, l_self_state_list_l_self_state_keys_141_exp_avg_, l_self_state_list_l_self_state_keys_142_exp_avg_, l_self_state_list_l_self_state_keys_143_exp_avg_, l_self_state_list_l_self_state_keys_144_exp_avg_, l_self_state_list_l_self_state_keys_145_exp_avg_, l_self_state_list_l_self_state_keys_146_exp_avg_, l_self_state_list_l_self_state_keys_147_exp_avg_], (getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923));  l_self_param_groups_0_params_0_ = l_self_param_groups_0_params_1_ = l_self_param_groups_0_params_2_ = l_self_param_groups_0_params_3_ = l_self_param_groups_0_params_4_ = l_self_param_groups_0_params_5_ = l_self_param_groups_0_params_6_ = l_self_param_groups_0_params_7_ = l_self_param_groups_0_params_8_ = l_self_param_groups_0_params_9_ = l_self_param_groups_0_params_10_ = l_self_param_groups_0_params_11_ = l_self_param_groups_0_params_12_ = l_self_param_groups_0_params_13_ = l_self_param_groups_0_params_14_ = l_self_param_groups_0_params_15_ = l_self_param_groups_0_params_16_ = l_self_param_groups_0_params_17_ = l_self_param_groups_0_params_18_ = l_self_param_groups_0_params_19_ = l_self_param_groups_0_params_20_ = l_self_param_groups_0_params_21_ = l_self_param_groups_0_params_22_ = l_self_param_groups_0_params_23_ = l_self_param_groups_0_params_24_ = l_self_param_groups_0_params_25_ = l_self_param_groups_0_params_26_ = l_self_param_groups_0_params_27_ = l_self_param_groups_0_params_28_ = l_self_param_groups_0_params_29_ = l_self_param_groups_0_params_30_ = l_self_param_groups_0_params_31_ = l_self_param_groups_0_params_32_ = l_self_param_groups_0_params_33_ = l_self_param_groups_0_params_34_ = l_self_param_groups_0_params_35_ = l_self_param_groups_0_params_36_ = l_self_param_groups_0_params_37_ = l_self_param_groups_0_params_38_ = l_self_param_groups_0_params_39_ = l_self_param_groups_0_params_40_ = l_self_param_groups_0_params_41_ = l_self_param_groups_0_params_42_ = l_self_param_groups_0_params_43_ = l_self_param_groups_0_params_44_ = l_self_param_groups_0_params_45_ = l_self_param_groups_0_params_46_ = l_self_param_groups_0_params_47_ = l_self_param_groups_0_params_48_ = l_self_param_groups_0_params_49_ = l_self_param_groups_0_params_50_ = l_self_param_groups_0_params_51_ = l_self_param_groups_0_params_52_ = l_self_param_groups_0_params_53_ = l_self_param_groups_0_params_54_ = l_self_param_groups_0_params_55_ = l_self_param_groups_0_params_56_ = l_self_param_groups_0_params_57_ = l_self_param_groups_0_params_58_ = l_self_param_groups_0_params_59_ = l_self_param_groups_0_params_60_ = l_self_param_groups_0_params_61_ = l_self_param_groups_0_params_62_ = l_self_param_groups_0_params_63_ = l_self_param_groups_0_params_64_ = l_self_param_groups_0_params_65_ = l_self_param_groups_0_params_66_ = l_self_param_groups_0_params_67_ = l_self_param_groups_0_params_68_ = l_self_param_groups_0_params_69_ = l_self_param_groups_0_params_70_ = l_self_param_groups_0_params_71_ = l_self_param_groups_0_params_72_ = l_self_param_groups_0_params_73_ = l_self_param_groups_0_params_74_ = l_self_param_groups_0_params_75_ = l_self_param_groups_0_params_76_ = l_self_param_groups_0_params_77_ = l_self_param_groups_0_params_78_ = l_self_param_groups_0_params_79_ = l_self_param_groups_0_params_80_ = l_self_param_groups_0_params_81_ = l_self_param_groups_0_params_82_ = l_self_param_groups_0_params_83_ = l_self_param_groups_0_params_84_ = l_self_param_groups_0_params_85_ = l_self_param_groups_0_params_86_ = l_self_param_groups_0_params_87_ = l_self_param_groups_0_params_88_ = l_self_param_groups_0_params_89_ = l_self_param_groups_0_params_90_ = l_self_param_groups_0_params_91_ = l_self_param_groups_0_params_92_ = l_self_param_groups_0_params_93_ = l_self_param_groups_0_params_94_ = l_self_param_groups_0_params_95_ = l_self_param_groups_0_params_96_ = l_self_param_groups_0_params_97_ = l_self_param_groups_0_params_98_ = l_self_param_groups_0_params_99_ = l_self_param_groups_0_params_100_ = l_self_param_groups_0_params_101_ = l_self_param_groups_0_params_102_ = l_self_param_groups_0_params_103_ = l_self_param_groups_0_params_104_ = l_self_param_groups_0_params_105_ = l_self_param_groups_0_params_106_ = l_self_param_groups_0_params_107_ = l_self_param_groups_0_params_108_ = l_self_param_groups_0_params_109_ = l_self_param_groups_0_params_110_ = l_self_param_groups_0_params_111_ = l_self_param_groups_0_params_112_ = l_self_param_groups_0_params_113_ = l_self_param_groups_0_params_114_ = l_self_param_groups_0_params_115_ = l_self_param_groups_0_params_116_ = l_self_param_groups_0_params_117_ = l_self_param_groups_0_params_118_ = l_self_param_groups_0_params_119_ = l_self_param_groups_0_params_120_ = l_self_param_groups_0_params_121_ = l_self_param_groups_0_params_122_ = l_self_param_groups_0_params_123_ = l_self_param_groups_0_params_124_ = l_self_param_groups_0_params_125_ = l_self_param_groups_0_params_126_ = l_self_param_groups_0_params_127_ = l_self_param_groups_0_params_128_ = l_self_param_groups_0_params_129_ = l_self_param_groups_0_params_130_ = l_self_param_groups_0_params_131_ = l_self_param_groups_0_params_132_ = l_self_param_groups_0_params_133_ = l_self_param_groups_0_params_134_ = l_self_param_groups_0_params_135_ = l_self_param_groups_0_params_136_ = l_self_param_groups_0_params_137_ = l_self_param_groups_0_params_138_ = l_self_param_groups_0_params_139_ = l_self_param_groups_0_params_140_ = l_self_param_groups_0_params_141_ = l_self_param_groups_0_params_142_ = l_self_param_groups_0_params_143_ = l_self_param_groups_0_params_144_ = l_self_param_groups_0_params_145_ = l_self_param_groups_0_params_146_ = l_self_param_groups_0_params_147_ = l_self_state_list_l_self_state_keys_0_exp_avg_ = l_self_state_list_l_self_state_keys_1_exp_avg_ = l_self_state_list_l_self_state_keys_2_exp_avg_ = l_self_state_list_l_self_state_keys_3_exp_avg_ = l_self_state_list_l_self_state_keys_4_exp_avg_ = l_self_state_list_l_self_state_keys_5_exp_avg_ = l_self_state_list_l_self_state_keys_6_exp_avg_ = l_self_state_list_l_self_state_keys_7_exp_avg_ = l_self_state_list_l_self_state_keys_8_exp_avg_ = l_self_state_list_l_self_state_keys_9_exp_avg_ = l_self_state_list_l_self_state_keys_10_exp_avg_ = l_self_state_list_l_self_state_keys_11_exp_avg_ = l_self_state_list_l_self_state_keys_12_exp_avg_ = l_self_state_list_l_self_state_keys_13_exp_avg_ = l_self_state_list_l_self_state_keys_14_exp_avg_ = l_self_state_list_l_self_state_keys_15_exp_avg_ = l_self_state_list_l_self_state_keys_16_exp_avg_ = l_self_state_list_l_self_state_keys_17_exp_avg_ = l_self_state_list_l_self_state_keys_18_exp_avg_ = l_self_state_list_l_self_state_keys_19_exp_avg_ = l_self_state_list_l_self_state_keys_20_exp_avg_ = l_self_state_list_l_self_state_keys_21_exp_avg_ = l_self_state_list_l_self_state_keys_22_exp_avg_ = l_self_state_list_l_self_state_keys_23_exp_avg_ = l_self_state_list_l_self_state_keys_24_exp_avg_ = l_self_state_list_l_self_state_keys_25_exp_avg_ = l_self_state_list_l_self_state_keys_26_exp_avg_ = l_self_state_list_l_self_state_keys_27_exp_avg_ = l_self_state_list_l_self_state_keys_28_exp_avg_ = l_self_state_list_l_self_state_keys_29_exp_avg_ = l_self_state_list_l_self_state_keys_30_exp_avg_ = l_self_state_list_l_self_state_keys_31_exp_avg_ = l_self_state_list_l_self_state_keys_32_exp_avg_ = l_self_state_list_l_self_state_keys_33_exp_avg_ = l_self_state_list_l_self_state_keys_34_exp_avg_ = l_self_state_list_l_self_state_keys_35_exp_avg_ = l_self_state_list_l_self_state_keys_36_exp_avg_ = l_self_state_list_l_self_state_keys_37_exp_avg_ = l_self_state_list_l_self_state_keys_38_exp_avg_ = l_self_state_list_l_self_state_keys_39_exp_avg_ = l_self_state_list_l_self_state_keys_40_exp_avg_ = l_self_state_list_l_self_state_keys_41_exp_avg_ = l_self_state_list_l_self_state_keys_42_exp_avg_ = l_self_state_list_l_self_state_keys_43_exp_avg_ = l_self_state_list_l_self_state_keys_44_exp_avg_ = l_self_state_list_l_self_state_keys_45_exp_avg_ = l_self_state_list_l_self_state_keys_46_exp_avg_ = l_self_state_list_l_self_state_keys_47_exp_avg_ = l_self_state_list_l_self_state_keys_48_exp_avg_ = l_self_state_list_l_self_state_keys_49_exp_avg_ = l_self_state_list_l_self_state_keys_50_exp_avg_ = l_self_state_list_l_self_state_keys_51_exp_avg_ = l_self_state_list_l_self_state_keys_52_exp_avg_ = l_self_state_list_l_self_state_keys_53_exp_avg_ = l_self_state_list_l_self_state_keys_54_exp_avg_ = l_self_state_list_l_self_state_keys_55_exp_avg_ = l_self_state_list_l_self_state_keys_56_exp_avg_ = l_self_state_list_l_self_state_keys_57_exp_avg_ = l_self_state_list_l_self_state_keys_58_exp_avg_ = l_self_state_list_l_self_state_keys_59_exp_avg_ = l_self_state_list_l_self_state_keys_60_exp_avg_ = l_self_state_list_l_self_state_keys_61_exp_avg_ = l_self_state_list_l_self_state_keys_62_exp_avg_ = l_self_state_list_l_self_state_keys_63_exp_avg_ = l_self_state_list_l_self_state_keys_64_exp_avg_ = l_self_state_list_l_self_state_keys_65_exp_avg_ = l_self_state_list_l_self_state_keys_66_exp_avg_ = l_self_state_list_l_self_state_keys_67_exp_avg_ = l_self_state_list_l_self_state_keys_68_exp_avg_ = l_self_state_list_l_self_state_keys_69_exp_avg_ = l_self_state_list_l_self_state_keys_70_exp_avg_ = l_self_state_list_l_self_state_keys_71_exp_avg_ = l_self_state_list_l_self_state_keys_72_exp_avg_ = l_self_state_list_l_self_state_keys_73_exp_avg_ = l_self_state_list_l_self_state_keys_74_exp_avg_ = l_self_state_list_l_self_state_keys_75_exp_avg_ = l_self_state_list_l_self_state_keys_76_exp_avg_ = l_self_state_list_l_self_state_keys_77_exp_avg_ = l_self_state_list_l_self_state_keys_78_exp_avg_ = l_self_state_list_l_self_state_keys_79_exp_avg_ = l_self_state_list_l_self_state_keys_80_exp_avg_ = l_self_state_list_l_self_state_keys_81_exp_avg_ = l_self_state_list_l_self_state_keys_82_exp_avg_ = l_self_state_list_l_self_state_keys_83_exp_avg_ = l_self_state_list_l_self_state_keys_84_exp_avg_ = l_self_state_list_l_self_state_keys_85_exp_avg_ = l_self_state_list_l_self_state_keys_86_exp_avg_ = l_self_state_list_l_self_state_keys_87_exp_avg_ = l_self_state_list_l_self_state_keys_88_exp_avg_ = l_self_state_list_l_self_state_keys_89_exp_avg_ = l_self_state_list_l_self_state_keys_90_exp_avg_ = l_self_state_list_l_self_state_keys_91_exp_avg_ = l_self_state_list_l_self_state_keys_92_exp_avg_ = l_self_state_list_l_self_state_keys_93_exp_avg_ = l_self_state_list_l_self_state_keys_94_exp_avg_ = l_self_state_list_l_self_state_keys_95_exp_avg_ = l_self_state_list_l_self_state_keys_96_exp_avg_ = l_self_state_list_l_self_state_keys_97_exp_avg_ = l_self_state_list_l_self_state_keys_98_exp_avg_ = l_self_state_list_l_self_state_keys_99_exp_avg_ = l_self_state_list_l_self_state_keys_100_exp_avg_ = l_self_state_list_l_self_state_keys_101_exp_avg_ = l_self_state_list_l_self_state_keys_102_exp_avg_ = l_self_state_list_l_self_state_keys_103_exp_avg_ = l_self_state_list_l_self_state_keys_104_exp_avg_ = l_self_state_list_l_self_state_keys_105_exp_avg_ = l_self_state_list_l_self_state_keys_106_exp_avg_ = l_self_state_list_l_self_state_keys_107_exp_avg_ = l_self_state_list_l_self_state_keys_108_exp_avg_ = l_self_state_list_l_self_state_keys_109_exp_avg_ = l_self_state_list_l_self_state_keys_110_exp_avg_ = l_self_state_list_l_self_state_keys_111_exp_avg_ = l_self_state_list_l_self_state_keys_112_exp_avg_ = l_self_state_list_l_self_state_keys_113_exp_avg_ = l_self_state_list_l_self_state_keys_114_exp_avg_ = l_self_state_list_l_self_state_keys_115_exp_avg_ = l_self_state_list_l_self_state_keys_116_exp_avg_ = l_self_state_list_l_self_state_keys_117_exp_avg_ = l_self_state_list_l_self_state_keys_118_exp_avg_ = l_self_state_list_l_self_state_keys_119_exp_avg_ = l_self_state_list_l_self_state_keys_120_exp_avg_ = l_self_state_list_l_self_state_keys_121_exp_avg_ = l_self_state_list_l_self_state_keys_122_exp_avg_ = l_self_state_list_l_self_state_keys_123_exp_avg_ = l_self_state_list_l_self_state_keys_124_exp_avg_ = l_self_state_list_l_self_state_keys_125_exp_avg_ = l_self_state_list_l_self_state_keys_126_exp_avg_ = l_self_state_list_l_self_state_keys_127_exp_avg_ = l_self_state_list_l_self_state_keys_128_exp_avg_ = l_self_state_list_l_self_state_keys_129_exp_avg_ = l_self_state_list_l_self_state_keys_130_exp_avg_ = l_self_state_list_l_self_state_keys_131_exp_avg_ = l_self_state_list_l_self_state_keys_132_exp_avg_ = l_self_state_list_l_self_state_keys_133_exp_avg_ = l_self_state_list_l_self_state_keys_134_exp_avg_ = l_self_state_list_l_self_state_keys_135_exp_avg_ = l_self_state_list_l_self_state_keys_136_exp_avg_ = l_self_state_list_l_self_state_keys_137_exp_avg_ = l_self_state_list_l_self_state_keys_138_exp_avg_ = l_self_state_list_l_self_state_keys_139_exp_avg_ = l_self_state_list_l_self_state_keys_140_exp_avg_ = l_self_state_list_l_self_state_keys_141_exp_avg_ = l_self_state_list_l_self_state_keys_142_exp_avg_ = l_self_state_list_l_self_state_keys_143_exp_avg_ = l_self_state_list_l_self_state_keys_144_exp_avg_ = l_self_state_list_l_self_state_keys_145_exp_avg_ = l_self_state_list_l_self_state_keys_146_exp_avg_ = l_self_state_list_l_self_state_keys_147_exp_avg_ = getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = _foreach_addcdiv_ = None
+	        return ()
+	        
+V0806 13:56:07.383000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9e669f5de4c8b0f7cdd129f8fcc83f7c"}
+	{
+	"name": "OutputGraph.call_user_compiler",
+	"ts": 1722977767383256.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:07.383000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2262e283850af4a8bd9532184526e3a4"}
+	{
+	"name": "backend_compile",
+	"ts": 1722977767383359.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:07.517000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "b9173e7748073b756e5d593bc739b193"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1722977767516968.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:10.004000 4107173 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:200] {"aot_forward_graph": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "60e4dea494f30c4eb3ba47a5e0bbfd0d"}
+	class <lambda>(torch.nn.Module):
+	    def forward(self, arg0_1: "f32[50304, 768][768, 1]cuda:0", arg1_1: "f32[1024, 768][768, 1]cuda:0", arg2_1: "f32[768][1]cuda:0", arg3_1: "f32[768][1]cuda:0", arg4_1: "f32[2304, 768][768, 1]cuda:0", arg5_1: "f32[2304][1]cuda:0", arg6_1: "f32[768, 768][768, 1]cuda:0", arg7_1: "f32[768][1]cuda:0", arg8_1: "f32[768][1]cuda:0", arg9_1: "f32[768][1]cuda:0", arg10_1: "f32[3072, 768][768, 1]cuda:0", arg11_1: "f32[3072][1]cuda:0", arg12_1: "f32[768, 3072][3072, 1]cuda:0", arg13_1: "f32[768][1]cuda:0", arg14_1: "f32[768][1]cuda:0", arg15_1: "f32[768][1]cuda:0", arg16_1: "f32[2304, 768][768, 1]cuda:0", arg17_1: "f32[2304][1]cuda:0", arg18_1: "f32[768, 768][768, 1]cuda:0", arg19_1: "f32[768][1]cuda:0", arg20_1: "f32[768][1]cuda:0", arg21_1: "f32[768][1]cuda:0", arg22_1: "f32[3072, 768][768, 1]cuda:0", arg23_1: "f32[3072][1]cuda:0", arg24_1: "f32[768, 3072][3072, 1]cuda:0", arg25_1: "f32[768][1]cuda:0", arg26_1: "f32[768][1]cuda:0", arg27_1: "f32[768][1]cuda:0", arg28_1: "f32[2304, 768][768, 1]cuda:0", arg29_1: "f32[2304][1]cuda:0", arg30_1: "f32[768, 768][768, 1]cuda:0", arg31_1: "f32[768][1]cuda:0", arg32_1: "f32[768][1]cuda:0", arg33_1: "f32[768][1]cuda:0", arg34_1: "f32[3072, 768][768, 1]cuda:0", arg35_1: "f32[3072][1]cuda:0", arg36_1: "f32[768, 3072][3072, 1]cuda:0", arg37_1: "f32[768][1]cuda:0", arg38_1: "f32[768][1]cuda:0", arg39_1: "f32[768][1]cuda:0", arg40_1: "f32[2304, 768][768, 1]cuda:0", arg41_1: "f32[2304][1]cuda:0", arg42_1: "f32[768, 768][768, 1]cuda:0", arg43_1: "f32[768][1]cuda:0", arg44_1: "f32[768][1]cuda:0", arg45_1: "f32[768][1]cuda:0", arg46_1: "f32[3072, 768][768, 1]cuda:0", arg47_1: "f32[3072][1]cuda:0", arg48_1: "f32[768, 3072][3072, 1]cuda:0", arg49_1: "f32[768][1]cuda:0", arg50_1: "f32[768][1]cuda:0", arg51_1: "f32[768][1]cuda:0", arg52_1: "f32[2304, 768][768, 1]cuda:0", arg53_1: "f32[2304][1]cuda:0", arg54_1: "f32[768, 768][768, 1]cuda:0", arg55_1: "f32[768][1]cuda:0", arg56_1: "f32[768][1]cuda:0", arg57_1: "f32[768][1]cuda:0", arg58_1: "f32[3072, 768][768, 1]cuda:0", arg59_1: "f32[3072][1]cuda:0", arg60_1: "f32[768, 3072][3072, 1]cuda:0", arg61_1: "f32[768][1]cuda:0", arg62_1: "f32[768][1]cuda:0", arg63_1: "f32[768][1]cuda:0", arg64_1: "f32[2304, 768][768, 1]cuda:0", arg65_1: "f32[2304][1]cuda:0", arg66_1: "f32[768, 768][768, 1]cuda:0", arg67_1: "f32[768][1]cuda:0", arg68_1: "f32[768][1]cuda:0", arg69_1: "f32[768][1]cuda:0", arg70_1: "f32[3072, 768][768, 1]cuda:0", arg71_1: "f32[3072][1]cuda:0", arg72_1: "f32[768, 3072][3072, 1]cuda:0", arg73_1: "f32[768][1]cuda:0", arg74_1: "f32[768][1]cuda:0", arg75_1: "f32[768][1]cuda:0", arg76_1: "f32[2304, 768][768, 1]cuda:0", arg77_1: "f32[2304][1]cuda:0", arg78_1: "f32[768, 768][768, 1]cuda:0", arg79_1: "f32[768][1]cuda:0", arg80_1: "f32[768][1]cuda:0", arg81_1: "f32[768][1]cuda:0", arg82_1: "f32[3072, 768][768, 1]cuda:0", arg83_1: "f32[3072][1]cuda:0", arg84_1: "f32[768, 3072][3072, 1]cuda:0", arg85_1: "f32[768][1]cuda:0", arg86_1: "f32[768][1]cuda:0", arg87_1: "f32[768][1]cuda:0", arg88_1: "f32[2304, 768][768, 1]cuda:0", arg89_1: "f32[2304][1]cuda:0", arg90_1: "f32[768, 768][768, 1]cuda:0", arg91_1: "f32[768][1]cuda:0", arg92_1: "f32[768][1]cuda:0", arg93_1: "f32[768][1]cuda:0", arg94_1: "f32[3072, 768][768, 1]cuda:0", arg95_1: "f32[3072][1]cuda:0", arg96_1: "f32[768, 3072][3072, 1]cuda:0", arg97_1: "f32[768][1]cuda:0", arg98_1: "f32[768][1]cuda:0", arg99_1: "f32[768][1]cuda:0", arg100_1: "f32[2304, 768][768, 1]cuda:0", arg101_1: "f32[2304][1]cuda:0", arg102_1: "f32[768, 768][768, 1]cuda:0", arg103_1: "f32[768][1]cuda:0", arg104_1: "f32[768][1]cuda:0", arg105_1: "f32[768][1]cuda:0", arg106_1: "f32[3072, 768][768, 1]cuda:0", arg107_1: "f32[3072][1]cuda:0", arg108_1: "f32[768, 3072][3072, 1]cuda:0", arg109_1: "f32[768][1]cuda:0", arg110_1: "f32[768][1]cuda:0", arg111_1: "f32[768][1]cuda:0", arg112_1: "f32[2304, 768][768, 1]cuda:0", arg113_1: "f32[2304][1]cuda:0", arg114_1: "f32[768, 768][768, 1]cuda:0", arg115_1: "f32[768][1]cuda:0", arg116_1: "f32[768][1]cuda:0", arg117_1: "f32[768][1]cuda:0", arg118_1: "f32[3072, 768][768, 1]cuda:0", arg119_1: "f32[3072][1]cuda:0", arg120_1: "f32[768, 3072][3072, 1]cuda:0", arg121_1: "f32[768][1]cuda:0", arg122_1: "f32[768][1]cuda:0", arg123_1: "f32[768][1]cuda:0", arg124_1: "f32[2304, 768][768, 1]cuda:0", arg125_1: "f32[2304][1]cuda:0", arg126_1: "f32[768, 768][768, 1]cuda:0", arg127_1: "f32[768][1]cuda:0", arg128_1: "f32[768][1]cuda:0", arg129_1: "f32[768][1]cuda:0", arg130_1: "f32[3072, 768][768, 1]cuda:0", arg131_1: "f32[3072][1]cuda:0", arg132_1: "f32[768, 3072][3072, 1]cuda:0", arg133_1: "f32[768][1]cuda:0", arg134_1: "f32[768][1]cuda:0", arg135_1: "f32[768][1]cuda:0", arg136_1: "f32[2304, 768][768, 1]cuda:0", arg137_1: "f32[2304][1]cuda:0", arg138_1: "f32[768, 768][768, 1]cuda:0", arg139_1: "f32[768][1]cuda:0", arg140_1: "f32[768][1]cuda:0", arg141_1: "f32[768][1]cuda:0", arg142_1: "f32[3072, 768][768, 1]cuda:0", arg143_1: "f32[3072][1]cuda:0", arg144_1: "f32[768, 3072][3072, 1]cuda:0", arg145_1: "f32[768][1]cuda:0", arg146_1: "f32[768][1]cuda:0", arg147_1: "f32[768][1]cuda:0", arg148_1: "f32[][]cuda:0", arg149_1: "f32[1024, 768][768, 1]cuda:0", arg150_1: "f32[1024, 768][768, 1]cuda:0", arg151_1: "f32[50304, 768][768, 1]cuda:0", arg152_1: "f32[1024, 768][768, 1]cuda:0", arg153_1: "f32[768][1]cuda:0", arg154_1: "f32[768][1]cuda:0", arg155_1: "f32[2304, 768][768, 1]cuda:0", arg156_1: "f32[2304][1]cuda:0", arg157_1: "f32[768, 768][768, 1]cuda:0", arg158_1: "f32[768][1]cuda:0", arg159_1: "f32[768][1]cuda:0", arg160_1: "f32[768][1]cuda:0", arg161_1: "f32[3072, 768][768, 1]cuda:0", arg162_1: "f32[3072][1]cuda:0", arg163_1: "f32[768, 3072][3072, 1]cuda:0", arg164_1: "f32[768][1]cuda:0", arg165_1: "f32[768][1]cuda:0", arg166_1: "f32[768][1]cuda:0", arg167_1: "f32[2304, 768][768, 1]cuda:0", arg168_1: "f32[2304][1]cuda:0", arg169_1: "f32[768, 768][768, 1]cuda:0", arg170_1: "f32[768][1]cuda:0", arg171_1: "f32[768][1]cuda:0", arg172_1: "f32[768][1]cuda:0", arg173_1: "f32[3072, 768][768, 1]cuda:0", arg174_1: "f32[3072][1]cuda:0", arg175_1: "f32[768, 3072][3072, 1]cuda:0", arg176_1: "f32[768][1]cuda:0", arg177_1: "f32[768][1]cuda:0", arg178_1: "f32[768][1]cuda:0", arg179_1: "f32[2304, 768][768, 1]cuda:0", arg180_1: "f32[2304][1]cuda:0", arg181_1: "f32[768, 768][768, 1]cuda:0", arg182_1: "f32[768][1]cuda:0", arg183_1: "f32[768][1]cuda:0", arg184_1: "f32[768][1]cuda:0", arg185_1: "f32[3072, 768][768, 1]cuda:0", arg186_1: "f32[3072][1]cuda:0", arg187_1: "f32[768, 3072][3072, 1]cuda:0", arg188_1: "f32[768][1]cuda:0", arg189_1: "f32[768][1]cuda:0", arg190_1: "f32[768][1]cuda:0", arg191_1: "f32[2304, 768][768, 1]cuda:0", arg192_1: "f32[2304][1]cuda:0", arg193_1: "f32[768, 768][768, 1]cuda:0", arg194_1: "f32[768][1]cuda:0", arg195_1: "f32[768][1]cuda:0", arg196_1: "f32[768][1]cuda:0", arg197_1: "f32[3072, 768][768, 1]cuda:0", arg198_1: "f32[3072][1]cuda:0", arg199_1: "f32[768, 3072][3072, 1]cuda:0", arg200_1: "f32[768][1]cuda:0", arg201_1: "f32[768][1]cuda:0", arg202_1: "f32[768][1]cuda:0", arg203_1: "f32[2304, 768][768, 1]cuda:0", arg204_1: "f32[2304][1]cuda:0", arg205_1: "f32[768, 768][768, 1]cuda:0", arg206_1: "f32[768][1]cuda:0", arg207_1: "f32[768][1]cuda:0", arg208_1: "f32[768][1]cuda:0", arg209_1: "f32[3072, 768][768, 1]cuda:0", arg210_1: "f32[3072][1]cuda:0", arg211_1: "f32[768, 3072][3072, 1]cuda:0", arg212_1: "f32[768][1]cuda:0", arg213_1: "f32[768][1]cuda:0", arg214_1: "f32[768][1]cuda:0", arg215_1: "f32[2304, 768][768, 1]cuda:0", arg216_1: "f32[2304][1]cuda:0", arg217_1: "f32[768, 768][768, 1]cuda:0", arg218_1: "f32[768][1]cuda:0", arg219_1: "f32[768][1]cuda:0", arg220_1: "f32[768][1]cuda:0", arg221_1: "f32[3072, 768][768, 1]cuda:0", arg222_1: "f32[3072][1]cuda:0", arg223_1: "f32[768, 3072][3072, 1]cuda:0", arg224_1: "f32[768][1]cuda:0", arg225_1: "f32[768][1]cuda:0", arg226_1: "f32[768][1]cuda:0", arg227_1: "f32[2304, 768][768, 1]cuda:0", arg228_1: "f32[2304][1]cuda:0", arg229_1: "f32[768, 768][768, 1]cuda:0", arg230_1: "f32[768][1]cuda:0", arg231_1: "f32[768][1]cuda:0", arg232_1: "f32[768][1]cuda:0", arg233_1: "f32[3072, 768][768, 1]cuda:0", arg234_1: "f32[3072][1]cuda:0", arg235_1: "f32[768, 3072][3072, 1]cuda:0", arg236_1: "f32[768][1]cuda:0", arg237_1: "f32[768][1]cuda:0", arg238_1: "f32[768][1]cuda:0", arg239_1: "f32[2304, 768][768, 1]cuda:0", arg240_1: "f32[2304][1]cuda:0", arg241_1: "f32[768, 768][768, 1]cuda:0", arg242_1: "f32[768][1]cuda:0", arg243_1: "f32[768][1]cuda:0", arg244_1: "f32[768][1]cuda:0", arg245_1: "f32[3072, 768][768, 1]cuda:0", arg246_1: "f32[3072][1]cuda:0", arg247_1: "f32[768, 3072][3072, 1]cuda:0", arg248_1: "f32[768][1]cuda:0", arg249_1: "f32[768][1]cuda:0", arg250_1: "f32[768][1]cuda:0", arg251_1: "f32[2304, 768][768, 1]cuda:0", arg252_1: "f32[2304][1]cuda:0", arg253_1: "f32[768, 768][768, 1]cuda:0", arg254_1: "f32[768][1]cuda:0", arg255_1: "f32[768][1]cuda:0", arg256_1: "f32[768][1]cuda:0", arg257_1: "f32[3072, 768][768, 1]cuda:0", arg258_1: "f32[3072][1]cuda:0", arg259_1: "f32[768, 3072][3072, 1]cuda:0", arg260_1: "f32[768][1]cuda:0", arg261_1: "f32[768][1]cuda:0", arg262_1: "f32[768][1]cuda:0", arg263_1: "f32[2304, 768][768, 1]cuda:0", arg264_1: "f32[2304][1]cuda:0", arg265_1: "f32[768, 768][768, 1]cuda:0", arg266_1: "f32[768][1]cuda:0", arg267_1: "f32[768][1]cuda:0", arg268_1: "f32[768][1]cuda:0", arg269_1: "f32[3072, 768][768, 1]cuda:0", arg270_1: "f32[3072][1]cuda:0", arg271_1: "f32[768, 3072][3072, 1]cuda:0", arg272_1: "f32[768][1]cuda:0", arg273_1: "f32[768][1]cuda:0", arg274_1: "f32[768][1]cuda:0", arg275_1: "f32[2304, 768][768, 1]cuda:0", arg276_1: "f32[2304][1]cuda:0", arg277_1: "f32[768, 768][768, 1]cuda:0", arg278_1: "f32[768][1]cuda:0", arg279_1: "f32[768][1]cuda:0", arg280_1: "f32[768][1]cuda:0", arg281_1: "f32[3072, 768][768, 1]cuda:0", arg282_1: "f32[3072][1]cuda:0", arg283_1: "f32[768, 3072][3072, 1]cuda:0", arg284_1: "f32[768][1]cuda:0", arg285_1: "f32[768][1]cuda:0", arg286_1: "f32[768][1]cuda:0", arg287_1: "f32[2304, 768][768, 1]cuda:0", arg288_1: "f32[2304][1]cuda:0", arg289_1: "f32[768, 768][768, 1]cuda:0", arg290_1: "f32[768][1]cuda:0", arg291_1: "f32[768][1]cuda:0", arg292_1: "f32[768][1]cuda:0", arg293_1: "f32[3072, 768][768, 1]cuda:0", arg294_1: "f32[3072][1]cuda:0", arg295_1: "f32[768, 3072][3072, 1]cuda:0", arg296_1: "f32[768][1]cuda:0", arg297_1: "f32[768][1]cuda:0", arg298_1: "f32[768][1]cuda:0", arg299_1: "f32[50304, 768][768, 1]cuda:0", arg300_1: "f32[768][1]cuda:0", arg301_1: "f32[768][1]cuda:0", arg302_1: "f32[2304, 768][768, 1]cuda:0", arg303_1: "f32[2304][1]cuda:0", arg304_1: "f32[768, 768][768, 1]cuda:0", arg305_1: "f32[768][1]cuda:0", arg306_1: "f32[768][1]cuda:0", arg307_1: "f32[768][1]cuda:0", arg308_1: "f32[3072, 768][768, 1]cuda:0", arg309_1: "f32[3072][1]cuda:0", arg310_1: "f32[768, 3072][3072, 1]cuda:0", arg311_1: "f32[768][1]cuda:0", arg312_1: "f32[768][1]cuda:0", arg313_1: "f32[768][1]cuda:0", arg314_1: "f32[2304, 768][768, 1]cuda:0", arg315_1: "f32[2304][1]cuda:0", arg316_1: "f32[768, 768][768, 1]cuda:0", arg317_1: "f32[768][1]cuda:0", arg318_1: "f32[768][1]cuda:0", arg319_1: "f32[768][1]cuda:0", arg320_1: "f32[3072, 768][768, 1]cuda:0", arg321_1: "f32[3072][1]cuda:0", arg322_1: "f32[768, 3072][3072, 1]cuda:0", arg323_1: "f32[768][1]cuda:0", arg324_1: "f32[768][1]cuda:0", arg325_1: "f32[768][1]cuda:0", arg326_1: "f32[2304, 768][768, 1]cuda:0", arg327_1: "f32[2304][1]cuda:0", arg328_1: "f32[768, 768][768, 1]cuda:0", arg329_1: "f32[768][1]cuda:0", arg330_1: "f32[768][1]cuda:0", arg331_1: "f32[768][1]cuda:0", arg332_1: "f32[3072, 768][768, 1]cuda:0", arg333_1: "f32[3072][1]cuda:0", arg334_1: "f32[768, 3072][3072, 1]cuda:0", arg335_1: "f32[768][1]cuda:0", arg336_1: "f32[768][1]cuda:0", arg337_1: "f32[768][1]cuda:0", arg338_1: "f32[2304, 768][768, 1]cuda:0", arg339_1: "f32[2304][1]cuda:0", arg340_1: "f32[768, 768][768, 1]cuda:0", arg341_1: "f32[768][1]cuda:0", arg342_1: "f32[768][1]cuda:0", arg343_1: "f32[768][1]cuda:0", arg344_1: "f32[3072, 768][768, 1]cuda:0", arg345_1: "f32[3072][1]cuda:0", arg346_1: "f32[768, 3072][3072, 1]cuda:0", arg347_1: "f32[768][1]cuda:0", arg348_1: "f32[768][1]cuda:0", arg349_1: "f32[768][1]cuda:0", arg350_1: "f32[2304, 768][768, 1]cuda:0", arg351_1: "f32[2304][1]cuda:0", arg352_1: "f32[768, 768][768, 1]cuda:0", arg353_1: "f32[768][1]cuda:0", arg354_1: "f32[768][1]cuda:0", arg355_1: "f32[768][1]cuda:0", arg356_1: "f32[3072, 768][768, 1]cuda:0", arg357_1: "f32[3072][1]cuda:0", arg358_1: "f32[768, 3072][3072, 1]cuda:0", arg359_1: "f32[768][1]cuda:0", arg360_1: "f32[768][1]cuda:0", arg361_1: "f32[768][1]cuda:0", arg362_1: "f32[2304, 768][768, 1]cuda:0", arg363_1: "f32[2304][1]cuda:0", arg364_1: "f32[768, 768][768, 1]cuda:0", arg365_1: "f32[768][1]cuda:0", arg366_1: "f32[768][1]cuda:0", arg367_1: "f32[768][1]cuda:0", arg368_1: "f32[3072, 768][768, 1]cuda:0", arg369_1: "f32[3072][1]cuda:0", arg370_1: "f32[768, 3072][3072, 1]cuda:0", arg371_1: "f32[768][1]cuda:0", arg372_1: "f32[768][1]cuda:0", arg373_1: "f32[768][1]cuda:0", arg374_1: "f32[2304, 768][768, 1]cuda:0", arg375_1: "f32[2304][1]cuda:0", arg376_1: "f32[768, 768][768, 1]cuda:0", arg377_1: "f32[768][1]cuda:0", arg378_1: "f32[768][1]cuda:0", arg379_1: "f32[768][1]cuda:0", arg380_1: "f32[3072, 768][768, 1]cuda:0", arg381_1: "f32[3072][1]cuda:0", arg382_1: "f32[768, 3072][3072, 1]cuda:0", arg383_1: "f32[768][1]cuda:0", arg384_1: "f32[768][1]cuda:0", arg385_1: "f32[768][1]cuda:0", arg386_1: "f32[2304, 768][768, 1]cuda:0", arg387_1: "f32[2304][1]cuda:0", arg388_1: "f32[768, 768][768, 1]cuda:0", arg389_1: "f32[768][1]cuda:0", arg390_1: "f32[768][1]cuda:0", arg391_1: "f32[768][1]cuda:0", arg392_1: "f32[3072, 768][768, 1]cuda:0", arg393_1: "f32[3072][1]cuda:0", arg394_1: "f32[768, 3072][3072, 1]cuda:0", arg395_1: "f32[768][1]cuda:0", arg396_1: "f32[768][1]cuda:0", arg397_1: "f32[768][1]cuda:0", arg398_1: "f32[2304, 768][768, 1]cuda:0", arg399_1: "f32[2304][1]cuda:0", arg400_1: "f32[768, 768][768, 1]cuda:0", arg401_1: "f32[768][1]cuda:0", arg402_1: "f32[768][1]cuda:0", arg403_1: "f32[768][1]cuda:0", arg404_1: "f32[3072, 768][768, 1]cuda:0", arg405_1: "f32[3072][1]cuda:0", arg406_1: "f32[768, 3072][3072, 1]cuda:0", arg407_1: "f32[768][1]cuda:0", arg408_1: "f32[768][1]cuda:0", arg409_1: "f32[768][1]cuda:0", arg410_1: "f32[2304, 768][768, 1]cuda:0", arg411_1: "f32[2304][1]cuda:0", arg412_1: "f32[768, 768][768, 1]cuda:0", arg413_1: "f32[768][1]cuda:0", arg414_1: "f32[768][1]cuda:0", arg415_1: "f32[768][1]cuda:0", arg416_1: "f32[3072, 768][768, 1]cuda:0", arg417_1: "f32[3072][1]cuda:0", arg418_1: "f32[768, 3072][3072, 1]cuda:0", arg419_1: "f32[768][1]cuda:0", arg420_1: "f32[768][1]cuda:0", arg421_1: "f32[768][1]cuda:0", arg422_1: "f32[2304, 768][768, 1]cuda:0", arg423_1: "f32[2304][1]cuda:0", arg424_1: "f32[768, 768][768, 1]cuda:0", arg425_1: "f32[768][1]cuda:0", arg426_1: "f32[768][1]cuda:0", arg427_1: "f32[768][1]cuda:0", arg428_1: "f32[3072, 768][768, 1]cuda:0", arg429_1: "f32[3072][1]cuda:0", arg430_1: "f32[768, 3072][3072, 1]cuda:0", arg431_1: "f32[768][1]cuda:0", arg432_1: "f32[768][1]cuda:0", arg433_1: "f32[768][1]cuda:0", arg434_1: "f32[2304, 768][768, 1]cuda:0", arg435_1: "f32[2304][1]cuda:0", arg436_1: "f32[768, 768][768, 1]cuda:0", arg437_1: "f32[768][1]cuda:0", arg438_1: "f32[768][1]cuda:0", arg439_1: "f32[768][1]cuda:0", arg440_1: "f32[3072, 768][768, 1]cuda:0", arg441_1: "f32[3072][1]cuda:0", arg442_1: "f32[768, 3072][3072, 1]cuda:0", arg443_1: "f32[768][1]cuda:0", arg444_1: "f32[768][1]cuda:0", arg445_1: "f32[768][1]cuda:0", arg446_1: "f32[50304, 768][768, 1]cuda:0", arg447_1: "f32[768][1]cuda:0", arg448_1: "f32[768][1]cuda:0", arg449_1: "f32[2304, 768][768, 1]cuda:0", arg450_1: "f32[2304][1]cuda:0", arg451_1: "f32[768, 768][768, 1]cuda:0", arg452_1: "f32[768][1]cuda:0", arg453_1: "f32[768][1]cuda:0", arg454_1: "f32[768][1]cuda:0", arg455_1: "f32[3072, 768][768, 1]cuda:0", arg456_1: "f32[3072][1]cuda:0", arg457_1: "f32[768, 3072][3072, 1]cuda:0", arg458_1: "f32[768][1]cuda:0", arg459_1: "f32[768][1]cuda:0", arg460_1: "f32[768][1]cuda:0", arg461_1: "f32[2304, 768][768, 1]cuda:0", arg462_1: "f32[2304][1]cuda:0", arg463_1: "f32[768, 768][768, 1]cuda:0", arg464_1: "f32[768][1]cuda:0", arg465_1: "f32[768][1]cuda:0", arg466_1: "f32[768][1]cuda:0", arg467_1: "f32[3072, 768][768, 1]cuda:0", arg468_1: "f32[3072][1]cuda:0", arg469_1: "f32[768, 3072][3072, 1]cuda:0", arg470_1: "f32[768][1]cuda:0", arg471_1: "f32[768][1]cuda:0", arg472_1: "f32[768][1]cuda:0", arg473_1: "f32[2304, 768][768, 1]cuda:0", arg474_1: "f32[2304][1]cuda:0", arg475_1: "f32[768, 768][768, 1]cuda:0", arg476_1: "f32[768][1]cuda:0", arg477_1: "f32[768][1]cuda:0", arg478_1: "f32[768][1]cuda:0", arg479_1: "f32[3072, 768][768, 1]cuda:0", arg480_1: "f32[3072][1]cuda:0", arg481_1: "f32[768, 3072][3072, 1]cuda:0", arg482_1: "f32[768][1]cuda:0", arg483_1: "f32[768][1]cuda:0", arg484_1: "f32[768][1]cuda:0", arg485_1: "f32[2304, 768][768, 1]cuda:0", arg486_1: "f32[2304][1]cuda:0", arg487_1: "f32[768, 768][768, 1]cuda:0", arg488_1: "f32[768][1]cuda:0", arg489_1: "f32[768][1]cuda:0", arg490_1: "f32[768][1]cuda:0", arg491_1: "f32[3072, 768][768, 1]cuda:0", arg492_1: "f32[3072][1]cuda:0", arg493_1: "f32[768, 3072][3072, 1]cuda:0", arg494_1: "f32[768][1]cuda:0", arg495_1: "f32[768][1]cuda:0", arg496_1: "f32[768][1]cuda:0", arg497_1: "f32[2304, 768][768, 1]cuda:0", arg498_1: "f32[2304][1]cuda:0", arg499_1: "f32[768, 768][768, 1]cuda:0", arg500_1: "f32[768][1]cuda:0", arg501_1: "f32[768][1]cuda:0", arg502_1: "f32[768][1]cuda:0", arg503_1: "f32[3072, 768][768, 1]cuda:0", arg504_1: "f32[3072][1]cuda:0", arg505_1: "f32[768, 3072][3072, 1]cuda:0", arg506_1: "f32[768][1]cuda:0", arg507_1: "f32[768][1]cuda:0", arg508_1: "f32[768][1]cuda:0", arg509_1: "f32[2304, 768][768, 1]cuda:0", arg510_1: "f32[2304][1]cuda:0", arg511_1: "f32[768, 768][768, 1]cuda:0", arg512_1: "f32[768][1]cuda:0", arg513_1: "f32[768][1]cuda:0", arg514_1: "f32[768][1]cuda:0", arg515_1: "f32[3072, 768][768, 1]cuda:0", arg516_1: "f32[3072][1]cuda:0", arg517_1: "f32[768, 3072][3072, 1]cuda:0", arg518_1: "f32[768][1]cuda:0", arg519_1: "f32[768][1]cuda:0", arg520_1: "f32[768][1]cuda:0", arg521_1: "f32[2304, 768][768, 1]cuda:0", arg522_1: "f32[2304][1]cuda:0", arg523_1: "f32[768, 768][768, 1]cuda:0", arg524_1: "f32[768][1]cuda:0", arg525_1: "f32[768][1]cuda:0", arg526_1: "f32[768][1]cuda:0", arg527_1: "f32[3072, 768][768, 1]cuda:0", arg528_1: "f32[3072][1]cuda:0", arg529_1: "f32[768, 3072][3072, 1]cuda:0", arg530_1: "f32[768][1]cuda:0", arg531_1: "f32[768][1]cuda:0", arg532_1: "f32[768][1]cuda:0", arg533_1: "f32[2304, 768][768, 1]cuda:0", arg534_1: "f32[2304][1]cuda:0", arg535_1: "f32[768, 768][768, 1]cuda:0", arg536_1: "f32[768][1]cuda:0", arg537_1: "f32[768][1]cuda:0", arg538_1: "f32[768][1]cuda:0", arg539_1: "f32[3072, 768][768, 1]cuda:0", arg540_1: "f32[3072][1]cuda:0", arg541_1: "f32[768, 3072][3072, 1]cuda:0", arg542_1: "f32[768][1]cuda:0", arg543_1: "f32[768][1]cuda:0", arg544_1: "f32[768][1]cuda:0", arg545_1: "f32[2304, 768][768, 1]cuda:0", arg546_1: "f32[2304][1]cuda:0", arg547_1: "f32[768, 768][768, 1]cuda:0", arg548_1: "f32[768][1]cuda:0", arg549_1: "f32[768][1]cuda:0", arg550_1: "f32[768][1]cuda:0", arg551_1: "f32[3072, 768][768, 1]cuda:0", arg552_1: "f32[3072][1]cuda:0", arg553_1: "f32[768, 3072][3072, 1]cuda:0", arg554_1: "f32[768][1]cuda:0", arg555_1: "f32[768][1]cuda:0", arg556_1: "f32[768][1]cuda:0", arg557_1: "f32[2304, 768][768, 1]cuda:0", arg558_1: "f32[2304][1]cuda:0", arg559_1: "f32[768, 768][768, 1]cuda:0", arg560_1: "f32[768][1]cuda:0", arg561_1: "f32[768][1]cuda:0", arg562_1: "f32[768][1]cuda:0", arg563_1: "f32[3072, 768][768, 1]cuda:0", arg564_1: "f32[3072][1]cuda:0", arg565_1: "f32[768, 3072][3072, 1]cuda:0", arg566_1: "f32[768][1]cuda:0", arg567_1: "f32[768][1]cuda:0", arg568_1: "f32[768][1]cuda:0", arg569_1: "f32[2304, 768][768, 1]cuda:0", arg570_1: "f32[2304][1]cuda:0", arg571_1: "f32[768, 768][768, 1]cuda:0", arg572_1: "f32[768][1]cuda:0", arg573_1: "f32[768][1]cuda:0", arg574_1: "f32[768][1]cuda:0", arg575_1: "f32[3072, 768][768, 1]cuda:0", arg576_1: "f32[3072][1]cuda:0", arg577_1: "f32[768, 3072][3072, 1]cuda:0", arg578_1: "f32[768][1]cuda:0", arg579_1: "f32[768][1]cuda:0", arg580_1: "f32[768][1]cuda:0", arg581_1: "f32[2304, 768][768, 1]cuda:0", arg582_1: "f32[2304][1]cuda:0", arg583_1: "f32[768, 768][768, 1]cuda:0", arg584_1: "f32[768][1]cuda:0", arg585_1: "f32[768][1]cuda:0", arg586_1: "f32[768][1]cuda:0", arg587_1: "f32[3072, 768][768, 1]cuda:0", arg588_1: "f32[3072][1]cuda:0", arg589_1: "f32[768, 3072][3072, 1]cuda:0", arg590_1: "f32[768][1]cuda:0", arg591_1: "f32[768][1]cuda:0", arg592_1: "f32[768][1]cuda:0", arg593_1: "f32[][]cuda:0", arg594_1: "f32[][]cuda:0", arg595_1: "f32[][]cuda:0", arg596_1: "f32[][]cuda:0", arg597_1: "f32[][]cuda:0", arg598_1: "f32[][]cuda:0", arg599_1: "f32[][]cuda:0", arg600_1: "f32[][]cuda:0", arg601_1: "f32[][]cuda:0", arg602_1: "f32[][]cuda:0", arg603_1: "f32[][]cuda:0", arg604_1: "f32[][]cuda:0", arg605_1: "f32[][]cuda:0", arg606_1: "f32[][]cuda:0", arg607_1: "f32[][]cuda:0", arg608_1: "f32[][]cuda:0", arg609_1: "f32[][]cuda:0", arg610_1: "f32[][]cuda:0", arg611_1: "f32[][]cuda:0", arg612_1: "f32[][]cuda:0", arg613_1: "f32[][]cuda:0", arg614_1: "f32[][]cuda:0", arg615_1: "f32[][]cuda:0", arg616_1: "f32[][]cuda:0", arg617_1: "f32[][]cuda:0", arg618_1: "f32[][]cuda:0", arg619_1: "f32[][]cuda:0", arg620_1: "f32[][]cuda:0", arg621_1: "f32[][]cuda:0", arg622_1: "f32[][]cuda:0", arg623_1: "f32[][]cuda:0", arg624_1: "f32[][]cuda:0", arg625_1: "f32[][]cuda:0", arg626_1: "f32[][]cuda:0", arg627_1: "f32[][]cuda:0", arg628_1: "f32[][]cuda:0", arg629_1: "f32[][]cuda:0", arg630_1: "f32[][]cuda:0", arg631_1: "f32[][]cuda:0", arg632_1: "f32[][]cuda:0", arg633_1: "f32[][]cuda:0", arg634_1: "f32[][]cuda:0", arg635_1: "f32[][]cuda:0", arg636_1: "f32[][]cuda:0", arg637_1: "f32[][]cuda:0", arg638_1: "f32[][]cuda:0", arg639_1: "f32[][]cuda:0", arg640_1: "f32[][]cuda:0", arg641_1: "f32[][]cuda:0", arg642_1: "f32[][]cuda:0", arg643_1: "f32[][]cuda:0", arg644_1: "f32[][]cuda:0", arg645_1: "f32[][]cuda:0", arg646_1: "f32[][]cuda:0", arg647_1: "f32[][]cuda:0", arg648_1: "f32[][]cuda:0", arg649_1: "f32[][]cuda:0", arg650_1: "f32[][]cuda:0", arg651_1: "f32[][]cuda:0", arg652_1: "f32[][]cuda:0", arg653_1: "f32[][]cuda:0", arg654_1: "f32[][]cuda:0", arg655_1: "f32[][]cuda:0", arg656_1: "f32[][]cuda:0", arg657_1: "f32[][]cuda:0", arg658_1: "f32[][]cuda:0", arg659_1: "f32[][]cuda:0", arg660_1: "f32[][]cuda:0", arg661_1: "f32[][]cuda:0", arg662_1: "f32[][]cuda:0", arg663_1: "f32[][]cuda:0", arg664_1: "f32[][]cuda:0", arg665_1: "f32[][]cuda:0", arg666_1: "f32[][]cuda:0", arg667_1: "f32[][]cuda:0", arg668_1: "f32[][]cuda:0", arg669_1: "f32[][]cuda:0", arg670_1: "f32[][]cuda:0", arg671_1: "f32[][]cuda:0", arg672_1: "f32[][]cuda:0", arg673_1: "f32[][]cuda:0", arg674_1: "f32[][]cuda:0", arg675_1: "f32[][]cuda:0", arg676_1: "f32[][]cuda:0", arg677_1: "f32[][]cuda:0", arg678_1: "f32[][]cuda:0", arg679_1: "f32[][]cuda:0", arg680_1: "f32[][]cuda:0", arg681_1: "f32[][]cuda:0", arg682_1: "f32[][]cuda:0", arg683_1: "f32[][]cuda:0", arg684_1: "f32[][]cuda:0", arg685_1: "f32[][]cuda:0", arg686_1: "f32[][]cuda:0", arg687_1: "f32[][]cuda:0", arg688_1: "f32[][]cuda:0", arg689_1: "f32[][]cuda:0", arg690_1: "f32[][]cuda:0", arg691_1: "f32[][]cuda:0", arg692_1: "f32[][]cuda:0", arg693_1: "f32[][]cuda:0", arg694_1: "f32[][]cuda:0", arg695_1: "f32[][]cuda:0", arg696_1: "f32[][]cuda:0", arg697_1: "f32[][]cuda:0", arg698_1: "f32[][]cuda:0", arg699_1: "f32[][]cuda:0", arg700_1: "f32[][]cuda:0", arg701_1: "f32[][]cuda:0", arg702_1: "f32[][]cuda:0", arg703_1: "f32[][]cuda:0", arg704_1: "f32[][]cuda:0", arg705_1: "f32[][]cuda:0", arg706_1: "f32[][]cuda:0", arg707_1: "f32[][]cuda:0", arg708_1: "f32[][]cuda:0", arg709_1: "f32[][]cuda:0", arg710_1: "f32[][]cuda:0", arg711_1: "f32[][]cuda:0", arg712_1: "f32[][]cuda:0", arg713_1: "f32[][]cuda:0", arg714_1: "f32[][]cuda:0", arg715_1: "f32[][]cuda:0", arg716_1: "f32[][]cuda:0", arg717_1: "f32[][]cuda:0", arg718_1: "f32[][]cuda:0", arg719_1: "f32[][]cuda:0", arg720_1: "f32[][]cuda:0", arg721_1: "f32[][]cuda:0", arg722_1: "f32[][]cuda:0", arg723_1: "f32[][]cuda:0", arg724_1: "f32[][]cuda:0", arg725_1: "f32[][]cuda:0", arg726_1: "f32[][]cuda:0", arg727_1: "f32[][]cuda:0", arg728_1: "f32[][]cuda:0", arg729_1: "f32[][]cuda:0", arg730_1: "f32[][]cuda:0", arg731_1: "f32[][]cuda:0", arg732_1: "f32[][]cuda:0", arg733_1: "f32[][]cuda:0", arg734_1: "f32[][]cuda:0", arg735_1: "f32[][]cuda:0", arg736_1: "f32[][]cuda:0", arg737_1: "f32[][]cuda:0", arg738_1: "f32[][]cuda:0", arg739_1: "f32[][]cuda:0"):
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1)
+	        _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)
+	        getitem: "f32[][]cuda:0" = _foreach_add[0]
+	        getitem_1: "f32[][]cuda:0" = _foreach_add[1]
+	        getitem_2: "f32[][]cuda:0" = _foreach_add[2]
+	        getitem_3: "f32[][]cuda:0" = _foreach_add[3]
+	        getitem_4: "f32[][]cuda:0" = _foreach_add[4]
+	        getitem_5: "f32[][]cuda:0" = _foreach_add[5]
+	        getitem_6: "f32[][]cuda:0" = _foreach_add[6]
+	        getitem_7: "f32[][]cuda:0" = _foreach_add[7]
+	        getitem_8: "f32[][]cuda:0" = _foreach_add[8]
+	        getitem_9: "f32[][]cuda:0" = _foreach_add[9]
+	        getitem_10: "f32[][]cuda:0" = _foreach_add[10]
+	        getitem_11: "f32[][]cuda:0" = _foreach_add[11]
+	        getitem_12: "f32[][]cuda:0" = _foreach_add[12]
+	        getitem_13: "f32[][]cuda:0" = _foreach_add[13]
+	        getitem_14: "f32[][]cuda:0" = _foreach_add[14]
+	        getitem_15: "f32[][]cuda:0" = _foreach_add[15]
+	        getitem_16: "f32[][]cuda:0" = _foreach_add[16]
+	        getitem_17: "f32[][]cuda:0" = _foreach_add[17]
+	        getitem_18: "f32[][]cuda:0" = _foreach_add[18]
+	        getitem_19: "f32[][]cuda:0" = _foreach_add[19]
+	        getitem_20: "f32[][]cuda:0" = _foreach_add[20]
+	        getitem_21: "f32[][]cuda:0" = _foreach_add[21]
+	        getitem_22: "f32[][]cuda:0" = _foreach_add[22]
+	        getitem_23: "f32[][]cuda:0" = _foreach_add[23]
+	        getitem_24: "f32[][]cuda:0" = _foreach_add[24]
+	        getitem_25: "f32[][]cuda:0" = _foreach_add[25]
+	        getitem_26: "f32[][]cuda:0" = _foreach_add[26]
+	        getitem_27: "f32[][]cuda:0" = _foreach_add[27]
+	        getitem_28: "f32[][]cuda:0" = _foreach_add[28]
+	        getitem_29: "f32[][]cuda:0" = _foreach_add[29]
+	        getitem_30: "f32[][]cuda:0" = _foreach_add[30]
+	        getitem_31: "f32[][]cuda:0" = _foreach_add[31]
+	        getitem_32: "f32[][]cuda:0" = _foreach_add[32]
+	        getitem_33: "f32[][]cuda:0" = _foreach_add[33]
+	        getitem_34: "f32[][]cuda:0" = _foreach_add[34]
+	        getitem_35: "f32[][]cuda:0" = _foreach_add[35]
+	        getitem_36: "f32[][]cuda:0" = _foreach_add[36]
+	        getitem_37: "f32[][]cuda:0" = _foreach_add[37]
+	        getitem_38: "f32[][]cuda:0" = _foreach_add[38]
+	        getitem_39: "f32[][]cuda:0" = _foreach_add[39]
+	        getitem_40: "f32[][]cuda:0" = _foreach_add[40]
+	        getitem_41: "f32[][]cuda:0" = _foreach_add[41]
+	        getitem_42: "f32[][]cuda:0" = _foreach_add[42]
+	        getitem_43: "f32[][]cuda:0" = _foreach_add[43]
+	        getitem_44: "f32[][]cuda:0" = _foreach_add[44]
+	        getitem_45: "f32[][]cuda:0" = _foreach_add[45]
+	        getitem_46: "f32[][]cuda:0" = _foreach_add[46]
+	        getitem_47: "f32[][]cuda:0" = _foreach_add[47]
+	        getitem_48: "f32[][]cuda:0" = _foreach_add[48]
+	        getitem_49: "f32[][]cuda:0" = _foreach_add[49]
+	        getitem_50: "f32[][]cuda:0" = _foreach_add[50]
+	        getitem_51: "f32[][]cuda:0" = _foreach_add[51]
+	        getitem_52: "f32[][]cuda:0" = _foreach_add[52]
+	        getitem_53: "f32[][]cuda:0" = _foreach_add[53]
+	        getitem_54: "f32[][]cuda:0" = _foreach_add[54]
+	        getitem_55: "f32[][]cuda:0" = _foreach_add[55]
+	        getitem_56: "f32[][]cuda:0" = _foreach_add[56]
+	        getitem_57: "f32[][]cuda:0" = _foreach_add[57]
+	        getitem_58: "f32[][]cuda:0" = _foreach_add[58]
+	        getitem_59: "f32[][]cuda:0" = _foreach_add[59]
+	        getitem_60: "f32[][]cuda:0" = _foreach_add[60]
+	        getitem_61: "f32[][]cuda:0" = _foreach_add[61]
+	        getitem_62: "f32[][]cuda:0" = _foreach_add[62]
+	        getitem_63: "f32[][]cuda:0" = _foreach_add[63]
+	        getitem_64: "f32[][]cuda:0" = _foreach_add[64]
+	        getitem_65: "f32[][]cuda:0" = _foreach_add[65]
+	        getitem_66: "f32[][]cuda:0" = _foreach_add[66]
+	        getitem_67: "f32[][]cuda:0" = _foreach_add[67]
+	        getitem_68: "f32[][]cuda:0" = _foreach_add[68]
+	        getitem_69: "f32[][]cuda:0" = _foreach_add[69]
+	        getitem_70: "f32[][]cuda:0" = _foreach_add[70]
+	        getitem_71: "f32[][]cuda:0" = _foreach_add[71]
+	        getitem_72: "f32[][]cuda:0" = _foreach_add[72]
+	        getitem_73: "f32[][]cuda:0" = _foreach_add[73]
+	        getitem_74: "f32[][]cuda:0" = _foreach_add[74]
+	        getitem_75: "f32[][]cuda:0" = _foreach_add[75]
+	        getitem_76: "f32[][]cuda:0" = _foreach_add[76]
+	        getitem_77: "f32[][]cuda:0" = _foreach_add[77]
+	        getitem_78: "f32[][]cuda:0" = _foreach_add[78]
+	        getitem_79: "f32[][]cuda:0" = _foreach_add[79]
+	        getitem_80: "f32[][]cuda:0" = _foreach_add[80]
+	        getitem_81: "f32[][]cuda:0" = _foreach_add[81]
+	        getitem_82: "f32[][]cuda:0" = _foreach_add[82]
+	        getitem_83: "f32[][]cuda:0" = _foreach_add[83]
+	        getitem_84: "f32[][]cuda:0" = _foreach_add[84]
+	        getitem_85: "f32[][]cuda:0" = _foreach_add[85]
+	        getitem_86: "f32[][]cuda:0" = _foreach_add[86]
+	        getitem_87: "f32[][]cuda:0" = _foreach_add[87]
+	        getitem_88: "f32[][]cuda:0" = _foreach_add[88]
+	        getitem_89: "f32[][]cuda:0" = _foreach_add[89]
+	        getitem_90: "f32[][]cuda:0" = _foreach_add[90]
+	        getitem_91: "f32[][]cuda:0" = _foreach_add[91]
+	        getitem_92: "f32[][]cuda:0" = _foreach_add[92]
+	        getitem_93: "f32[][]cuda:0" = _foreach_add[93]
+	        getitem_94: "f32[][]cuda:0" = _foreach_add[94]
+	        getitem_95: "f32[][]cuda:0" = _foreach_add[95]
+	        getitem_96: "f32[][]cuda:0" = _foreach_add[96]
+	        getitem_97: "f32[][]cuda:0" = _foreach_add[97]
+	        getitem_98: "f32[][]cuda:0" = _foreach_add[98]
+	        getitem_99: "f32[][]cuda:0" = _foreach_add[99]
+	        getitem_100: "f32[][]cuda:0" = _foreach_add[100]
+	        getitem_101: "f32[][]cuda:0" = _foreach_add[101]
+	        getitem_102: "f32[][]cuda:0" = _foreach_add[102]
+	        getitem_103: "f32[][]cuda:0" = _foreach_add[103]
+	        getitem_104: "f32[][]cuda:0" = _foreach_add[104]
+	        getitem_105: "f32[][]cuda:0" = _foreach_add[105]
+	        getitem_106: "f32[][]cuda:0" = _foreach_add[106]
+	        getitem_107: "f32[][]cuda:0" = _foreach_add[107]
+	        getitem_108: "f32[][]cuda:0" = _foreach_add[108]
+	        getitem_109: "f32[][]cuda:0" = _foreach_add[109]
+	        getitem_110: "f32[][]cuda:0" = _foreach_add[110]
+	        getitem_111: "f32[][]cuda:0" = _foreach_add[111]
+	        getitem_112: "f32[][]cuda:0" = _foreach_add[112]
+	        getitem_113: "f32[][]cuda:0" = _foreach_add[113]
+	        getitem_114: "f32[][]cuda:0" = _foreach_add[114]
+	        getitem_115: "f32[][]cuda:0" = _foreach_add[115]
+	        getitem_116: "f32[][]cuda:0" = _foreach_add[116]
+	        getitem_117: "f32[][]cuda:0" = _foreach_add[117]
+	        getitem_118: "f32[][]cuda:0" = _foreach_add[118]
+	        getitem_119: "f32[][]cuda:0" = _foreach_add[119]
+	        getitem_120: "f32[][]cuda:0" = _foreach_add[120]
+	        getitem_121: "f32[][]cuda:0" = _foreach_add[121]
+	        getitem_122: "f32[][]cuda:0" = _foreach_add[122]
+	        getitem_123: "f32[][]cuda:0" = _foreach_add[123]
+	        getitem_124: "f32[][]cuda:0" = _foreach_add[124]
+	        getitem_125: "f32[][]cuda:0" = _foreach_add[125]
+	        getitem_126: "f32[][]cuda:0" = _foreach_add[126]
+	        getitem_127: "f32[][]cuda:0" = _foreach_add[127]
+	        getitem_128: "f32[][]cuda:0" = _foreach_add[128]
+	        getitem_129: "f32[][]cuda:0" = _foreach_add[129]
+	        getitem_130: "f32[][]cuda:0" = _foreach_add[130]
+	        getitem_131: "f32[][]cuda:0" = _foreach_add[131]
+	        getitem_132: "f32[][]cuda:0" = _foreach_add[132]
+	        getitem_133: "f32[][]cuda:0" = _foreach_add[133]
+	        getitem_134: "f32[][]cuda:0" = _foreach_add[134]
+	        getitem_135: "f32[][]cuda:0" = _foreach_add[135]
+	        getitem_136: "f32[][]cuda:0" = _foreach_add[136]
+	        getitem_137: "f32[][]cuda:0" = _foreach_add[137]
+	        getitem_138: "f32[][]cuda:0" = _foreach_add[138]
+	        getitem_139: "f32[][]cuda:0" = _foreach_add[139]
+	        getitem_140: "f32[][]cuda:0" = _foreach_add[140]
+	        getitem_141: "f32[][]cuda:0" = _foreach_add[141]
+	        getitem_142: "f32[][]cuda:0" = _foreach_add[142]
+	        getitem_143: "f32[][]cuda:0" = _foreach_add[143]
+	        getitem_144: "f32[][]cuda:0" = _foreach_add[144]
+	        getitem_145: "f32[][]cuda:0" = _foreach_add[145]
+	        getitem_146: "f32[][]cuda:0" = _foreach_add[146]
+	        getitem_147: "f32[][]cuda:0" = _foreach_add[147];  _foreach_add = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
+	        _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])
+	        getitem_148: "f32[50304, 768][768, 1]cuda:0" = _foreach_sub[0]
+	        getitem_149: "f32[1024, 768][768, 1]cuda:0" = _foreach_sub[1]
+	        getitem_150: "f32[768][1]cuda:0" = _foreach_sub[2]
+	        getitem_151: "f32[768][1]cuda:0" = _foreach_sub[3]
+	        getitem_152: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[4]
+	        getitem_153: "f32[2304][1]cuda:0" = _foreach_sub[5]
+	        getitem_154: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[6]
+	        getitem_155: "f32[768][1]cuda:0" = _foreach_sub[7]
+	        getitem_156: "f32[768][1]cuda:0" = _foreach_sub[8]
+	        getitem_157: "f32[768][1]cuda:0" = _foreach_sub[9]
+	        getitem_158: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[10]
+	        getitem_159: "f32[3072][1]cuda:0" = _foreach_sub[11]
+	        getitem_160: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[12]
+	        getitem_161: "f32[768][1]cuda:0" = _foreach_sub[13]
+	        getitem_162: "f32[768][1]cuda:0" = _foreach_sub[14]
+	        getitem_163: "f32[768][1]cuda:0" = _foreach_sub[15]
+	        getitem_164: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[16]
+	        getitem_165: "f32[2304][1]cuda:0" = _foreach_sub[17]
+	        getitem_166: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[18]
+	        getitem_167: "f32[768][1]cuda:0" = _foreach_sub[19]
+	        getitem_168: "f32[768][1]cuda:0" = _foreach_sub[20]
+	        getitem_169: "f32[768][1]cuda:0" = _foreach_sub[21]
+	        getitem_170: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[22]
+	        getitem_171: "f32[3072][1]cuda:0" = _foreach_sub[23]
+	        getitem_172: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[24]
+	        getitem_173: "f32[768][1]cuda:0" = _foreach_sub[25]
+	        getitem_174: "f32[768][1]cuda:0" = _foreach_sub[26]
+	        getitem_175: "f32[768][1]cuda:0" = _foreach_sub[27]
+	        getitem_176: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[28]
+	        getitem_177: "f32[2304][1]cuda:0" = _foreach_sub[29]
+	        getitem_178: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[30]
+	        getitem_179: "f32[768][1]cuda:0" = _foreach_sub[31]
+	        getitem_180: "f32[768][1]cuda:0" = _foreach_sub[32]
+	        getitem_181: "f32[768][1]cuda:0" = _foreach_sub[33]
+	        getitem_182: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[34]
+	        getitem_183: "f32[3072][1]cuda:0" = _foreach_sub[35]
+	        getitem_184: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[36]
+	        getitem_185: "f32[768][1]cuda:0" = _foreach_sub[37]
+	        getitem_186: "f32[768][1]cuda:0" = _foreach_sub[38]
+	        getitem_187: "f32[768][1]cuda:0" = _foreach_sub[39]
+	        getitem_188: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[40]
+	        getitem_189: "f32[2304][1]cuda:0" = _foreach_sub[41]
+	        getitem_190: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[42]
+	        getitem_191: "f32[768][1]cuda:0" = _foreach_sub[43]
+	        getitem_192: "f32[768][1]cuda:0" = _foreach_sub[44]
+	        getitem_193: "f32[768][1]cuda:0" = _foreach_sub[45]
+	        getitem_194: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[46]
+	        getitem_195: "f32[3072][1]cuda:0" = _foreach_sub[47]
+	        getitem_196: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[48]
+	        getitem_197: "f32[768][1]cuda:0" = _foreach_sub[49]
+	        getitem_198: "f32[768][1]cuda:0" = _foreach_sub[50]
+	        getitem_199: "f32[768][1]cuda:0" = _foreach_sub[51]
+	        getitem_200: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[52]
+	        getitem_201: "f32[2304][1]cuda:0" = _foreach_sub[53]
+	        getitem_202: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[54]
+	        getitem_203: "f32[768][1]cuda:0" = _foreach_sub[55]
+	        getitem_204: "f32[768][1]cuda:0" = _foreach_sub[56]
+	        getitem_205: "f32[768][1]cuda:0" = _foreach_sub[57]
+	        getitem_206: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[58]
+	        getitem_207: "f32[3072][1]cuda:0" = _foreach_sub[59]
+	        getitem_208: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[60]
+	        getitem_209: "f32[768][1]cuda:0" = _foreach_sub[61]
+	        getitem_210: "f32[768][1]cuda:0" = _foreach_sub[62]
+	        getitem_211: "f32[768][1]cuda:0" = _foreach_sub[63]
+	        getitem_212: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[64]
+	        getitem_213: "f32[2304][1]cuda:0" = _foreach_sub[65]
+	        getitem_214: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[66]
+	        getitem_215: "f32[768][1]cuda:0" = _foreach_sub[67]
+	        getitem_216: "f32[768][1]cuda:0" = _foreach_sub[68]
+	        getitem_217: "f32[768][1]cuda:0" = _foreach_sub[69]
+	        getitem_218: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[70]
+	        getitem_219: "f32[3072][1]cuda:0" = _foreach_sub[71]
+	        getitem_220: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[72]
+	        getitem_221: "f32[768][1]cuda:0" = _foreach_sub[73]
+	        getitem_222: "f32[768][1]cuda:0" = _foreach_sub[74]
+	        getitem_223: "f32[768][1]cuda:0" = _foreach_sub[75]
+	        getitem_224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[76]
+	        getitem_225: "f32[2304][1]cuda:0" = _foreach_sub[77]
+	        getitem_226: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[78]
+	        getitem_227: "f32[768][1]cuda:0" = _foreach_sub[79]
+	        getitem_228: "f32[768][1]cuda:0" = _foreach_sub[80]
+	        getitem_229: "f32[768][1]cuda:0" = _foreach_sub[81]
+	        getitem_230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[82]
+	        getitem_231: "f32[3072][1]cuda:0" = _foreach_sub[83]
+	        getitem_232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[84]
+	        getitem_233: "f32[768][1]cuda:0" = _foreach_sub[85]
+	        getitem_234: "f32[768][1]cuda:0" = _foreach_sub[86]
+	        getitem_235: "f32[768][1]cuda:0" = _foreach_sub[87]
+	        getitem_236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[88]
+	        getitem_237: "f32[2304][1]cuda:0" = _foreach_sub[89]
+	        getitem_238: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[90]
+	        getitem_239: "f32[768][1]cuda:0" = _foreach_sub[91]
+	        getitem_240: "f32[768][1]cuda:0" = _foreach_sub[92]
+	        getitem_241: "f32[768][1]cuda:0" = _foreach_sub[93]
+	        getitem_242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[94]
+	        getitem_243: "f32[3072][1]cuda:0" = _foreach_sub[95]
+	        getitem_244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[96]
+	        getitem_245: "f32[768][1]cuda:0" = _foreach_sub[97]
+	        getitem_246: "f32[768][1]cuda:0" = _foreach_sub[98]
+	        getitem_247: "f32[768][1]cuda:0" = _foreach_sub[99]
+	        getitem_248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[100]
+	        getitem_249: "f32[2304][1]cuda:0" = _foreach_sub[101]
+	        getitem_250: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[102]
+	        getitem_251: "f32[768][1]cuda:0" = _foreach_sub[103]
+	        getitem_252: "f32[768][1]cuda:0" = _foreach_sub[104]
+	        getitem_253: "f32[768][1]cuda:0" = _foreach_sub[105]
+	        getitem_254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[106]
+	        getitem_255: "f32[3072][1]cuda:0" = _foreach_sub[107]
+	        getitem_256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[108]
+	        getitem_257: "f32[768][1]cuda:0" = _foreach_sub[109]
+	        getitem_258: "f32[768][1]cuda:0" = _foreach_sub[110]
+	        getitem_259: "f32[768][1]cuda:0" = _foreach_sub[111]
+	        getitem_260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[112]
+	        getitem_261: "f32[2304][1]cuda:0" = _foreach_sub[113]
+	        getitem_262: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[114]
+	        getitem_263: "f32[768][1]cuda:0" = _foreach_sub[115]
+	        getitem_264: "f32[768][1]cuda:0" = _foreach_sub[116]
+	        getitem_265: "f32[768][1]cuda:0" = _foreach_sub[117]
+	        getitem_266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[118]
+	        getitem_267: "f32[3072][1]cuda:0" = _foreach_sub[119]
+	        getitem_268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[120]
+	        getitem_269: "f32[768][1]cuda:0" = _foreach_sub[121]
+	        getitem_270: "f32[768][1]cuda:0" = _foreach_sub[122]
+	        getitem_271: "f32[768][1]cuda:0" = _foreach_sub[123]
+	        getitem_272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[124]
+	        getitem_273: "f32[2304][1]cuda:0" = _foreach_sub[125]
+	        getitem_274: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[126]
+	        getitem_275: "f32[768][1]cuda:0" = _foreach_sub[127]
+	        getitem_276: "f32[768][1]cuda:0" = _foreach_sub[128]
+	        getitem_277: "f32[768][1]cuda:0" = _foreach_sub[129]
+	        getitem_278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[130]
+	        getitem_279: "f32[3072][1]cuda:0" = _foreach_sub[131]
+	        getitem_280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[132]
+	        getitem_281: "f32[768][1]cuda:0" = _foreach_sub[133]
+	        getitem_282: "f32[768][1]cuda:0" = _foreach_sub[134]
+	        getitem_283: "f32[768][1]cuda:0" = _foreach_sub[135]
+	        getitem_284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[136]
+	        getitem_285: "f32[2304][1]cuda:0" = _foreach_sub[137]
+	        getitem_286: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[138]
+	        getitem_287: "f32[768][1]cuda:0" = _foreach_sub[139]
+	        getitem_288: "f32[768][1]cuda:0" = _foreach_sub[140]
+	        getitem_289: "f32[768][1]cuda:0" = _foreach_sub[141]
+	        getitem_290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[142]
+	        getitem_291: "f32[3072][1]cuda:0" = _foreach_sub[143]
+	        getitem_292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[144]
+	        getitem_293: "f32[768][1]cuda:0" = _foreach_sub[145]
+	        getitem_294: "f32[768][1]cuda:0" = _foreach_sub[146]
+	        getitem_295: "f32[768][1]cuda:0" = _foreach_sub[147];  _foreach_sub = None
+	        _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998);  getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None
+	        getitem_296: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul[0]
+	        getitem_297: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul[1]
+	        getitem_298: "f32[768][1]cuda:0" = _foreach_mul[2]
+	        getitem_299: "f32[768][1]cuda:0" = _foreach_mul[3]
+	        getitem_300: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[4]
+	        getitem_301: "f32[2304][1]cuda:0" = _foreach_mul[5]
+	        getitem_302: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[6]
+	        getitem_303: "f32[768][1]cuda:0" = _foreach_mul[7]
+	        getitem_304: "f32[768][1]cuda:0" = _foreach_mul[8]
+	        getitem_305: "f32[768][1]cuda:0" = _foreach_mul[9]
+	        getitem_306: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[10]
+	        getitem_307: "f32[3072][1]cuda:0" = _foreach_mul[11]
+	        getitem_308: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[12]
+	        getitem_309: "f32[768][1]cuda:0" = _foreach_mul[13]
+	        getitem_310: "f32[768][1]cuda:0" = _foreach_mul[14]
+	        getitem_311: "f32[768][1]cuda:0" = _foreach_mul[15]
+	        getitem_312: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[16]
+	        getitem_313: "f32[2304][1]cuda:0" = _foreach_mul[17]
+	        getitem_314: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[18]
+	        getitem_315: "f32[768][1]cuda:0" = _foreach_mul[19]
+	        getitem_316: "f32[768][1]cuda:0" = _foreach_mul[20]
+	        getitem_317: "f32[768][1]cuda:0" = _foreach_mul[21]
+	        getitem_318: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[22]
+	        getitem_319: "f32[3072][1]cuda:0" = _foreach_mul[23]
+	        getitem_320: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[24]
+	        getitem_321: "f32[768][1]cuda:0" = _foreach_mul[25]
+	        getitem_322: "f32[768][1]cuda:0" = _foreach_mul[26]
+	        getitem_323: "f32[768][1]cuda:0" = _foreach_mul[27]
+	        getitem_324: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[28]
+	        getitem_325: "f32[2304][1]cuda:0" = _foreach_mul[29]
+	        getitem_326: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[30]
+	        getitem_327: "f32[768][1]cuda:0" = _foreach_mul[31]
+	        getitem_328: "f32[768][1]cuda:0" = _foreach_mul[32]
+	        getitem_329: "f32[768][1]cuda:0" = _foreach_mul[33]
+	        getitem_330: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[34]
+	        getitem_331: "f32[3072][1]cuda:0" = _foreach_mul[35]
+	        getitem_332: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[36]
+	        getitem_333: "f32[768][1]cuda:0" = _foreach_mul[37]
+	        getitem_334: "f32[768][1]cuda:0" = _foreach_mul[38]
+	        getitem_335: "f32[768][1]cuda:0" = _foreach_mul[39]
+	        getitem_336: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[40]
+	        getitem_337: "f32[2304][1]cuda:0" = _foreach_mul[41]
+	        getitem_338: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[42]
+	        getitem_339: "f32[768][1]cuda:0" = _foreach_mul[43]
+	        getitem_340: "f32[768][1]cuda:0" = _foreach_mul[44]
+	        getitem_341: "f32[768][1]cuda:0" = _foreach_mul[45]
+	        getitem_342: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[46]
+	        getitem_343: "f32[3072][1]cuda:0" = _foreach_mul[47]
+	        getitem_344: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[48]
+	        getitem_345: "f32[768][1]cuda:0" = _foreach_mul[49]
+	        getitem_346: "f32[768][1]cuda:0" = _foreach_mul[50]
+	        getitem_347: "f32[768][1]cuda:0" = _foreach_mul[51]
+	        getitem_348: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[52]
+	        getitem_349: "f32[2304][1]cuda:0" = _foreach_mul[53]
+	        getitem_350: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[54]
+	        getitem_351: "f32[768][1]cuda:0" = _foreach_mul[55]
+	        getitem_352: "f32[768][1]cuda:0" = _foreach_mul[56]
+	        getitem_353: "f32[768][1]cuda:0" = _foreach_mul[57]
+	        getitem_354: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[58]
+	        getitem_355: "f32[3072][1]cuda:0" = _foreach_mul[59]
+	        getitem_356: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[60]
+	        getitem_357: "f32[768][1]cuda:0" = _foreach_mul[61]
+	        getitem_358: "f32[768][1]cuda:0" = _foreach_mul[62]
+	        getitem_359: "f32[768][1]cuda:0" = _foreach_mul[63]
+	        getitem_360: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[64]
+	        getitem_361: "f32[2304][1]cuda:0" = _foreach_mul[65]
+	        getitem_362: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[66]
+	        getitem_363: "f32[768][1]cuda:0" = _foreach_mul[67]
+	        getitem_364: "f32[768][1]cuda:0" = _foreach_mul[68]
+	        getitem_365: "f32[768][1]cuda:0" = _foreach_mul[69]
+	        getitem_366: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[70]
+	        getitem_367: "f32[3072][1]cuda:0" = _foreach_mul[71]
+	        getitem_368: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[72]
+	        getitem_369: "f32[768][1]cuda:0" = _foreach_mul[73]
+	        getitem_370: "f32[768][1]cuda:0" = _foreach_mul[74]
+	        getitem_371: "f32[768][1]cuda:0" = _foreach_mul[75]
+	        getitem_372: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[76]
+	        getitem_373: "f32[2304][1]cuda:0" = _foreach_mul[77]
+	        getitem_374: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[78]
+	        getitem_375: "f32[768][1]cuda:0" = _foreach_mul[79]
+	        getitem_376: "f32[768][1]cuda:0" = _foreach_mul[80]
+	        getitem_377: "f32[768][1]cuda:0" = _foreach_mul[81]
+	        getitem_378: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[82]
+	        getitem_379: "f32[3072][1]cuda:0" = _foreach_mul[83]
+	        getitem_380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[84]
+	        getitem_381: "f32[768][1]cuda:0" = _foreach_mul[85]
+	        getitem_382: "f32[768][1]cuda:0" = _foreach_mul[86]
+	        getitem_383: "f32[768][1]cuda:0" = _foreach_mul[87]
+	        getitem_384: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[88]
+	        getitem_385: "f32[2304][1]cuda:0" = _foreach_mul[89]
+	        getitem_386: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[90]
+	        getitem_387: "f32[768][1]cuda:0" = _foreach_mul[91]
+	        getitem_388: "f32[768][1]cuda:0" = _foreach_mul[92]
+	        getitem_389: "f32[768][1]cuda:0" = _foreach_mul[93]
+	        getitem_390: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[94]
+	        getitem_391: "f32[3072][1]cuda:0" = _foreach_mul[95]
+	        getitem_392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[96]
+	        getitem_393: "f32[768][1]cuda:0" = _foreach_mul[97]
+	        getitem_394: "f32[768][1]cuda:0" = _foreach_mul[98]
+	        getitem_395: "f32[768][1]cuda:0" = _foreach_mul[99]
+	        getitem_396: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[100]
+	        getitem_397: "f32[2304][1]cuda:0" = _foreach_mul[101]
+	        getitem_398: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[102]
+	        getitem_399: "f32[768][1]cuda:0" = _foreach_mul[103]
+	        getitem_400: "f32[768][1]cuda:0" = _foreach_mul[104]
+	        getitem_401: "f32[768][1]cuda:0" = _foreach_mul[105]
+	        getitem_402: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[106]
+	        getitem_403: "f32[3072][1]cuda:0" = _foreach_mul[107]
+	        getitem_404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[108]
+	        getitem_405: "f32[768][1]cuda:0" = _foreach_mul[109]
+	        getitem_406: "f32[768][1]cuda:0" = _foreach_mul[110]
+	        getitem_407: "f32[768][1]cuda:0" = _foreach_mul[111]
+	        getitem_408: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[112]
+	        getitem_409: "f32[2304][1]cuda:0" = _foreach_mul[113]
+	        getitem_410: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[114]
+	        getitem_411: "f32[768][1]cuda:0" = _foreach_mul[115]
+	        getitem_412: "f32[768][1]cuda:0" = _foreach_mul[116]
+	        getitem_413: "f32[768][1]cuda:0" = _foreach_mul[117]
+	        getitem_414: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[118]
+	        getitem_415: "f32[3072][1]cuda:0" = _foreach_mul[119]
+	        getitem_416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[120]
+	        getitem_417: "f32[768][1]cuda:0" = _foreach_mul[121]
+	        getitem_418: "f32[768][1]cuda:0" = _foreach_mul[122]
+	        getitem_419: "f32[768][1]cuda:0" = _foreach_mul[123]
+	        getitem_420: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[124]
+	        getitem_421: "f32[2304][1]cuda:0" = _foreach_mul[125]
+	        getitem_422: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[126]
+	        getitem_423: "f32[768][1]cuda:0" = _foreach_mul[127]
+	        getitem_424: "f32[768][1]cuda:0" = _foreach_mul[128]
+	        getitem_425: "f32[768][1]cuda:0" = _foreach_mul[129]
+	        getitem_426: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[130]
+	        getitem_427: "f32[3072][1]cuda:0" = _foreach_mul[131]
+	        getitem_428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[132]
+	        getitem_429: "f32[768][1]cuda:0" = _foreach_mul[133]
+	        getitem_430: "f32[768][1]cuda:0" = _foreach_mul[134]
+	        getitem_431: "f32[768][1]cuda:0" = _foreach_mul[135]
+	        getitem_432: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[136]
+	        getitem_433: "f32[2304][1]cuda:0" = _foreach_mul[137]
+	        getitem_434: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[138]
+	        getitem_435: "f32[768][1]cuda:0" = _foreach_mul[139]
+	        getitem_436: "f32[768][1]cuda:0" = _foreach_mul[140]
+	        getitem_437: "f32[768][1]cuda:0" = _foreach_mul[141]
+	        getitem_438: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[142]
+	        getitem_439: "f32[3072][1]cuda:0" = _foreach_mul[143]
+	        getitem_440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[144]
+	        getitem_441: "f32[768][1]cuda:0" = _foreach_mul[145]
+	        getitem_442: "f32[768][1]cuda:0" = _foreach_mul[146]
+	        getitem_443: "f32[768][1]cuda:0" = _foreach_mul[147];  _foreach_mul = None
+	        _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]);  getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None
+	        getitem_444: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_1[0]
+	        getitem_445: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_1[1]
+	        getitem_446: "f32[768][1]cuda:0" = _foreach_add_1[2]
+	        getitem_447: "f32[768][1]cuda:0" = _foreach_add_1[3]
+	        getitem_448: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[4]
+	        getitem_449: "f32[2304][1]cuda:0" = _foreach_add_1[5]
+	        getitem_450: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[6]
+	        getitem_451: "f32[768][1]cuda:0" = _foreach_add_1[7]
+	        getitem_452: "f32[768][1]cuda:0" = _foreach_add_1[8]
+	        getitem_453: "f32[768][1]cuda:0" = _foreach_add_1[9]
+	        getitem_454: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[10]
+	        getitem_455: "f32[3072][1]cuda:0" = _foreach_add_1[11]
+	        getitem_456: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[12]
+	        getitem_457: "f32[768][1]cuda:0" = _foreach_add_1[13]
+	        getitem_458: "f32[768][1]cuda:0" = _foreach_add_1[14]
+	        getitem_459: "f32[768][1]cuda:0" = _foreach_add_1[15]
+	        getitem_460: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[16]
+	        getitem_461: "f32[2304][1]cuda:0" = _foreach_add_1[17]
+	        getitem_462: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[18]
+	        getitem_463: "f32[768][1]cuda:0" = _foreach_add_1[19]
+	        getitem_464: "f32[768][1]cuda:0" = _foreach_add_1[20]
+	        getitem_465: "f32[768][1]cuda:0" = _foreach_add_1[21]
+	        getitem_466: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[22]
+	        getitem_467: "f32[3072][1]cuda:0" = _foreach_add_1[23]
+	        getitem_468: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[24]
+	        getitem_469: "f32[768][1]cuda:0" = _foreach_add_1[25]
+	        getitem_470: "f32[768][1]cuda:0" = _foreach_add_1[26]
+	        getitem_471: "f32[768][1]cuda:0" = _foreach_add_1[27]
+	        getitem_472: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[28]
+	        getitem_473: "f32[2304][1]cuda:0" = _foreach_add_1[29]
+	        getitem_474: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[30]
+	        getitem_475: "f32[768][1]cuda:0" = _foreach_add_1[31]
+	        getitem_476: "f32[768][1]cuda:0" = _foreach_add_1[32]
+	        getitem_477: "f32[768][1]cuda:0" = _foreach_add_1[33]
+	        getitem_478: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[34]
+	        getitem_479: "f32[3072][1]cuda:0" = _foreach_add_1[35]
+	        getitem_480: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[36]
+	        getitem_481: "f32[768][1]cuda:0" = _foreach_add_1[37]
+	        getitem_482: "f32[768][1]cuda:0" = _foreach_add_1[38]
+	        getitem_483: "f32[768][1]cuda:0" = _foreach_add_1[39]
+	        getitem_484: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[40]
+	        getitem_485: "f32[2304][1]cuda:0" = _foreach_add_1[41]
+	        getitem_486: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[42]
+	        getitem_487: "f32[768][1]cuda:0" = _foreach_add_1[43]
+	        getitem_488: "f32[768][1]cuda:0" = _foreach_add_1[44]
+	        getitem_489: "f32[768][1]cuda:0" = _foreach_add_1[45]
+	        getitem_490: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[46]
+	        getitem_491: "f32[3072][1]cuda:0" = _foreach_add_1[47]
+	        getitem_492: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[48]
+	        getitem_493: "f32[768][1]cuda:0" = _foreach_add_1[49]
+	        getitem_494: "f32[768][1]cuda:0" = _foreach_add_1[50]
+	        getitem_495: "f32[768][1]cuda:0" = _foreach_add_1[51]
+	        getitem_496: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[52]
+	        getitem_497: "f32[2304][1]cuda:0" = _foreach_add_1[53]
+	        getitem_498: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[54]
+	        getitem_499: "f32[768][1]cuda:0" = _foreach_add_1[55]
+	        getitem_500: "f32[768][1]cuda:0" = _foreach_add_1[56]
+	        getitem_501: "f32[768][1]cuda:0" = _foreach_add_1[57]
+	        getitem_502: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[58]
+	        getitem_503: "f32[3072][1]cuda:0" = _foreach_add_1[59]
+	        getitem_504: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[60]
+	        getitem_505: "f32[768][1]cuda:0" = _foreach_add_1[61]
+	        getitem_506: "f32[768][1]cuda:0" = _foreach_add_1[62]
+	        getitem_507: "f32[768][1]cuda:0" = _foreach_add_1[63]
+	        getitem_508: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[64]
+	        getitem_509: "f32[2304][1]cuda:0" = _foreach_add_1[65]
+	        getitem_510: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[66]
+	        getitem_511: "f32[768][1]cuda:0" = _foreach_add_1[67]
+	        getitem_512: "f32[768][1]cuda:0" = _foreach_add_1[68]
+	        getitem_513: "f32[768][1]cuda:0" = _foreach_add_1[69]
+	        getitem_514: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[70]
+	        getitem_515: "f32[3072][1]cuda:0" = _foreach_add_1[71]
+	        getitem_516: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[72]
+	        getitem_517: "f32[768][1]cuda:0" = _foreach_add_1[73]
+	        getitem_518: "f32[768][1]cuda:0" = _foreach_add_1[74]
+	        getitem_519: "f32[768][1]cuda:0" = _foreach_add_1[75]
+	        getitem_520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[76]
+	        getitem_521: "f32[2304][1]cuda:0" = _foreach_add_1[77]
+	        getitem_522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[78]
+	        getitem_523: "f32[768][1]cuda:0" = _foreach_add_1[79]
+	        getitem_524: "f32[768][1]cuda:0" = _foreach_add_1[80]
+	        getitem_525: "f32[768][1]cuda:0" = _foreach_add_1[81]
+	        getitem_526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[82]
+	        getitem_527: "f32[3072][1]cuda:0" = _foreach_add_1[83]
+	        getitem_528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[84]
+	        getitem_529: "f32[768][1]cuda:0" = _foreach_add_1[85]
+	        getitem_530: "f32[768][1]cuda:0" = _foreach_add_1[86]
+	        getitem_531: "f32[768][1]cuda:0" = _foreach_add_1[87]
+	        getitem_532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[88]
+	        getitem_533: "f32[2304][1]cuda:0" = _foreach_add_1[89]
+	        getitem_534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[90]
+	        getitem_535: "f32[768][1]cuda:0" = _foreach_add_1[91]
+	        getitem_536: "f32[768][1]cuda:0" = _foreach_add_1[92]
+	        getitem_537: "f32[768][1]cuda:0" = _foreach_add_1[93]
+	        getitem_538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[94]
+	        getitem_539: "f32[3072][1]cuda:0" = _foreach_add_1[95]
+	        getitem_540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[96]
+	        getitem_541: "f32[768][1]cuda:0" = _foreach_add_1[97]
+	        getitem_542: "f32[768][1]cuda:0" = _foreach_add_1[98]
+	        getitem_543: "f32[768][1]cuda:0" = _foreach_add_1[99]
+	        getitem_544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[100]
+	        getitem_545: "f32[2304][1]cuda:0" = _foreach_add_1[101]
+	        getitem_546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[102]
+	        getitem_547: "f32[768][1]cuda:0" = _foreach_add_1[103]
+	        getitem_548: "f32[768][1]cuda:0" = _foreach_add_1[104]
+	        getitem_549: "f32[768][1]cuda:0" = _foreach_add_1[105]
+	        getitem_550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[106]
+	        getitem_551: "f32[3072][1]cuda:0" = _foreach_add_1[107]
+	        getitem_552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[108]
+	        getitem_553: "f32[768][1]cuda:0" = _foreach_add_1[109]
+	        getitem_554: "f32[768][1]cuda:0" = _foreach_add_1[110]
+	        getitem_555: "f32[768][1]cuda:0" = _foreach_add_1[111]
+	        getitem_556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[112]
+	        getitem_557: "f32[2304][1]cuda:0" = _foreach_add_1[113]
+	        getitem_558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[114]
+	        getitem_559: "f32[768][1]cuda:0" = _foreach_add_1[115]
+	        getitem_560: "f32[768][1]cuda:0" = _foreach_add_1[116]
+	        getitem_561: "f32[768][1]cuda:0" = _foreach_add_1[117]
+	        getitem_562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[118]
+	        getitem_563: "f32[3072][1]cuda:0" = _foreach_add_1[119]
+	        getitem_564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[120]
+	        getitem_565: "f32[768][1]cuda:0" = _foreach_add_1[121]
+	        getitem_566: "f32[768][1]cuda:0" = _foreach_add_1[122]
+	        getitem_567: "f32[768][1]cuda:0" = _foreach_add_1[123]
+	        getitem_568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[124]
+	        getitem_569: "f32[2304][1]cuda:0" = _foreach_add_1[125]
+	        getitem_570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[126]
+	        getitem_571: "f32[768][1]cuda:0" = _foreach_add_1[127]
+	        getitem_572: "f32[768][1]cuda:0" = _foreach_add_1[128]
+	        getitem_573: "f32[768][1]cuda:0" = _foreach_add_1[129]
+	        getitem_574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[130]
+	        getitem_575: "f32[3072][1]cuda:0" = _foreach_add_1[131]
+	        getitem_576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[132]
+	        getitem_577: "f32[768][1]cuda:0" = _foreach_add_1[133]
+	        getitem_578: "f32[768][1]cuda:0" = _foreach_add_1[134]
+	        getitem_579: "f32[768][1]cuda:0" = _foreach_add_1[135]
+	        getitem_580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[136]
+	        getitem_581: "f32[2304][1]cuda:0" = _foreach_add_1[137]
+	        getitem_582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[138]
+	        getitem_583: "f32[768][1]cuda:0" = _foreach_add_1[139]
+	        getitem_584: "f32[768][1]cuda:0" = _foreach_add_1[140]
+	        getitem_585: "f32[768][1]cuda:0" = _foreach_add_1[141]
+	        getitem_586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[142]
+	        getitem_587: "f32[3072][1]cuda:0" = _foreach_add_1[143]
+	        getitem_588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[144]
+	        getitem_589: "f32[768][1]cuda:0" = _foreach_add_1[145]
+	        getitem_590: "f32[768][1]cuda:0" = _foreach_add_1[146]
+	        getitem_591: "f32[768][1]cuda:0" = _foreach_add_1[147];  _foreach_add_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2)
+	        _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)
+	        getitem_592: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_1[0]
+	        getitem_593: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_1[1]
+	        getitem_594: "f32[768][1]cuda:0" = _foreach_mul_1[2]
+	        getitem_595: "f32[768][1]cuda:0" = _foreach_mul_1[3]
+	        getitem_596: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[4]
+	        getitem_597: "f32[2304][1]cuda:0" = _foreach_mul_1[5]
+	        getitem_598: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[6]
+	        getitem_599: "f32[768][1]cuda:0" = _foreach_mul_1[7]
+	        getitem_600: "f32[768][1]cuda:0" = _foreach_mul_1[8]
+	        getitem_601: "f32[768][1]cuda:0" = _foreach_mul_1[9]
+	        getitem_602: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[10]
+	        getitem_603: "f32[3072][1]cuda:0" = _foreach_mul_1[11]
+	        getitem_604: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[12]
+	        getitem_605: "f32[768][1]cuda:0" = _foreach_mul_1[13]
+	        getitem_606: "f32[768][1]cuda:0" = _foreach_mul_1[14]
+	        getitem_607: "f32[768][1]cuda:0" = _foreach_mul_1[15]
+	        getitem_608: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[16]
+	        getitem_609: "f32[2304][1]cuda:0" = _foreach_mul_1[17]
+	        getitem_610: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[18]
+	        getitem_611: "f32[768][1]cuda:0" = _foreach_mul_1[19]
+	        getitem_612: "f32[768][1]cuda:0" = _foreach_mul_1[20]
+	        getitem_613: "f32[768][1]cuda:0" = _foreach_mul_1[21]
+	        getitem_614: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[22]
+	        getitem_615: "f32[3072][1]cuda:0" = _foreach_mul_1[23]
+	        getitem_616: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[24]
+	        getitem_617: "f32[768][1]cuda:0" = _foreach_mul_1[25]
+	        getitem_618: "f32[768][1]cuda:0" = _foreach_mul_1[26]
+	        getitem_619: "f32[768][1]cuda:0" = _foreach_mul_1[27]
+	        getitem_620: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[28]
+	        getitem_621: "f32[2304][1]cuda:0" = _foreach_mul_1[29]
+	        getitem_622: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[30]
+	        getitem_623: "f32[768][1]cuda:0" = _foreach_mul_1[31]
+	        getitem_624: "f32[768][1]cuda:0" = _foreach_mul_1[32]
+	        getitem_625: "f32[768][1]cuda:0" = _foreach_mul_1[33]
+	        getitem_626: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[34]
+	        getitem_627: "f32[3072][1]cuda:0" = _foreach_mul_1[35]
+	        getitem_628: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[36]
+	        getitem_629: "f32[768][1]cuda:0" = _foreach_mul_1[37]
+	        getitem_630: "f32[768][1]cuda:0" = _foreach_mul_1[38]
+	        getitem_631: "f32[768][1]cuda:0" = _foreach_mul_1[39]
+	        getitem_632: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[40]
+	        getitem_633: "f32[2304][1]cuda:0" = _foreach_mul_1[41]
+	        getitem_634: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[42]
+	        getitem_635: "f32[768][1]cuda:0" = _foreach_mul_1[43]
+	        getitem_636: "f32[768][1]cuda:0" = _foreach_mul_1[44]
+	        getitem_637: "f32[768][1]cuda:0" = _foreach_mul_1[45]
+	        getitem_638: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[46]
+	        getitem_639: "f32[3072][1]cuda:0" = _foreach_mul_1[47]
+	        getitem_640: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[48]
+	        getitem_641: "f32[768][1]cuda:0" = _foreach_mul_1[49]
+	        getitem_642: "f32[768][1]cuda:0" = _foreach_mul_1[50]
+	        getitem_643: "f32[768][1]cuda:0" = _foreach_mul_1[51]
+	        getitem_644: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[52]
+	        getitem_645: "f32[2304][1]cuda:0" = _foreach_mul_1[53]
+	        getitem_646: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[54]
+	        getitem_647: "f32[768][1]cuda:0" = _foreach_mul_1[55]
+	        getitem_648: "f32[768][1]cuda:0" = _foreach_mul_1[56]
+	        getitem_649: "f32[768][1]cuda:0" = _foreach_mul_1[57]
+	        getitem_650: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[58]
+	        getitem_651: "f32[3072][1]cuda:0" = _foreach_mul_1[59]
+	        getitem_652: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[60]
+	        getitem_653: "f32[768][1]cuda:0" = _foreach_mul_1[61]
+	        getitem_654: "f32[768][1]cuda:0" = _foreach_mul_1[62]
+	        getitem_655: "f32[768][1]cuda:0" = _foreach_mul_1[63]
+	        getitem_656: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[64]
+	        getitem_657: "f32[2304][1]cuda:0" = _foreach_mul_1[65]
+	        getitem_658: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[66]
+	        getitem_659: "f32[768][1]cuda:0" = _foreach_mul_1[67]
+	        getitem_660: "f32[768][1]cuda:0" = _foreach_mul_1[68]
+	        getitem_661: "f32[768][1]cuda:0" = _foreach_mul_1[69]
+	        getitem_662: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[70]
+	        getitem_663: "f32[3072][1]cuda:0" = _foreach_mul_1[71]
+	        getitem_664: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[72]
+	        getitem_665: "f32[768][1]cuda:0" = _foreach_mul_1[73]
+	        getitem_666: "f32[768][1]cuda:0" = _foreach_mul_1[74]
+	        getitem_667: "f32[768][1]cuda:0" = _foreach_mul_1[75]
+	        getitem_668: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[76]
+	        getitem_669: "f32[2304][1]cuda:0" = _foreach_mul_1[77]
+	        getitem_670: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[78]
+	        getitem_671: "f32[768][1]cuda:0" = _foreach_mul_1[79]
+	        getitem_672: "f32[768][1]cuda:0" = _foreach_mul_1[80]
+	        getitem_673: "f32[768][1]cuda:0" = _foreach_mul_1[81]
+	        getitem_674: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[82]
+	        getitem_675: "f32[3072][1]cuda:0" = _foreach_mul_1[83]
+	        getitem_676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[84]
+	        getitem_677: "f32[768][1]cuda:0" = _foreach_mul_1[85]
+	        getitem_678: "f32[768][1]cuda:0" = _foreach_mul_1[86]
+	        getitem_679: "f32[768][1]cuda:0" = _foreach_mul_1[87]
+	        getitem_680: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[88]
+	        getitem_681: "f32[2304][1]cuda:0" = _foreach_mul_1[89]
+	        getitem_682: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[90]
+	        getitem_683: "f32[768][1]cuda:0" = _foreach_mul_1[91]
+	        getitem_684: "f32[768][1]cuda:0" = _foreach_mul_1[92]
+	        getitem_685: "f32[768][1]cuda:0" = _foreach_mul_1[93]
+	        getitem_686: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[94]
+	        getitem_687: "f32[3072][1]cuda:0" = _foreach_mul_1[95]
+	        getitem_688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[96]
+	        getitem_689: "f32[768][1]cuda:0" = _foreach_mul_1[97]
+	        getitem_690: "f32[768][1]cuda:0" = _foreach_mul_1[98]
+	        getitem_691: "f32[768][1]cuda:0" = _foreach_mul_1[99]
+	        getitem_692: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[100]
+	        getitem_693: "f32[2304][1]cuda:0" = _foreach_mul_1[101]
+	        getitem_694: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[102]
+	        getitem_695: "f32[768][1]cuda:0" = _foreach_mul_1[103]
+	        getitem_696: "f32[768][1]cuda:0" = _foreach_mul_1[104]
+	        getitem_697: "f32[768][1]cuda:0" = _foreach_mul_1[105]
+	        getitem_698: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[106]
+	        getitem_699: "f32[3072][1]cuda:0" = _foreach_mul_1[107]
+	        getitem_700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[108]
+	        getitem_701: "f32[768][1]cuda:0" = _foreach_mul_1[109]
+	        getitem_702: "f32[768][1]cuda:0" = _foreach_mul_1[110]
+	        getitem_703: "f32[768][1]cuda:0" = _foreach_mul_1[111]
+	        getitem_704: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[112]
+	        getitem_705: "f32[2304][1]cuda:0" = _foreach_mul_1[113]
+	        getitem_706: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[114]
+	        getitem_707: "f32[768][1]cuda:0" = _foreach_mul_1[115]
+	        getitem_708: "f32[768][1]cuda:0" = _foreach_mul_1[116]
+	        getitem_709: "f32[768][1]cuda:0" = _foreach_mul_1[117]
+	        getitem_710: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[118]
+	        getitem_711: "f32[3072][1]cuda:0" = _foreach_mul_1[119]
+	        getitem_712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[120]
+	        getitem_713: "f32[768][1]cuda:0" = _foreach_mul_1[121]
+	        getitem_714: "f32[768][1]cuda:0" = _foreach_mul_1[122]
+	        getitem_715: "f32[768][1]cuda:0" = _foreach_mul_1[123]
+	        getitem_716: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[124]
+	        getitem_717: "f32[2304][1]cuda:0" = _foreach_mul_1[125]
+	        getitem_718: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[126]
+	        getitem_719: "f32[768][1]cuda:0" = _foreach_mul_1[127]
+	        getitem_720: "f32[768][1]cuda:0" = _foreach_mul_1[128]
+	        getitem_721: "f32[768][1]cuda:0" = _foreach_mul_1[129]
+	        getitem_722: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[130]
+	        getitem_723: "f32[3072][1]cuda:0" = _foreach_mul_1[131]
+	        getitem_724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[132]
+	        getitem_725: "f32[768][1]cuda:0" = _foreach_mul_1[133]
+	        getitem_726: "f32[768][1]cuda:0" = _foreach_mul_1[134]
+	        getitem_727: "f32[768][1]cuda:0" = _foreach_mul_1[135]
+	        getitem_728: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[136]
+	        getitem_729: "f32[2304][1]cuda:0" = _foreach_mul_1[137]
+	        getitem_730: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[138]
+	        getitem_731: "f32[768][1]cuda:0" = _foreach_mul_1[139]
+	        getitem_732: "f32[768][1]cuda:0" = _foreach_mul_1[140]
+	        getitem_733: "f32[768][1]cuda:0" = _foreach_mul_1[141]
+	        getitem_734: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[142]
+	        getitem_735: "f32[3072][1]cuda:0" = _foreach_mul_1[143]
+	        getitem_736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[144]
+	        getitem_737: "f32[768][1]cuda:0" = _foreach_mul_1[145]
+	        getitem_738: "f32[768][1]cuda:0" = _foreach_mul_1[146]
+	        getitem_739: "f32[768][1]cuda:0" = _foreach_mul_1[147];  _foreach_mul_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]);  arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None
+	        getitem_740: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_2[0]
+	        getitem_741: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_2[1]
+	        getitem_742: "f32[768][1]cuda:0" = _foreach_mul_2[2]
+	        getitem_743: "f32[768][1]cuda:0" = _foreach_mul_2[3]
+	        getitem_744: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[4]
+	        getitem_745: "f32[2304][1]cuda:0" = _foreach_mul_2[5]
+	        getitem_746: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[6]
+	        getitem_747: "f32[768][1]cuda:0" = _foreach_mul_2[7]
+	        getitem_748: "f32[768][1]cuda:0" = _foreach_mul_2[8]
+	        getitem_749: "f32[768][1]cuda:0" = _foreach_mul_2[9]
+	        getitem_750: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[10]
+	        getitem_751: "f32[3072][1]cuda:0" = _foreach_mul_2[11]
+	        getitem_752: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[12]
+	        getitem_753: "f32[768][1]cuda:0" = _foreach_mul_2[13]
+	        getitem_754: "f32[768][1]cuda:0" = _foreach_mul_2[14]
+	        getitem_755: "f32[768][1]cuda:0" = _foreach_mul_2[15]
+	        getitem_756: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[16]
+	        getitem_757: "f32[2304][1]cuda:0" = _foreach_mul_2[17]
+	        getitem_758: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[18]
+	        getitem_759: "f32[768][1]cuda:0" = _foreach_mul_2[19]
+	        getitem_760: "f32[768][1]cuda:0" = _foreach_mul_2[20]
+	        getitem_761: "f32[768][1]cuda:0" = _foreach_mul_2[21]
+	        getitem_762: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[22]
+	        getitem_763: "f32[3072][1]cuda:0" = _foreach_mul_2[23]
+	        getitem_764: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[24]
+	        getitem_765: "f32[768][1]cuda:0" = _foreach_mul_2[25]
+	        getitem_766: "f32[768][1]cuda:0" = _foreach_mul_2[26]
+	        getitem_767: "f32[768][1]cuda:0" = _foreach_mul_2[27]
+	        getitem_768: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[28]
+	        getitem_769: "f32[2304][1]cuda:0" = _foreach_mul_2[29]
+	        getitem_770: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[30]
+	        getitem_771: "f32[768][1]cuda:0" = _foreach_mul_2[31]
+	        getitem_772: "f32[768][1]cuda:0" = _foreach_mul_2[32]
+	        getitem_773: "f32[768][1]cuda:0" = _foreach_mul_2[33]
+	        getitem_774: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[34]
+	        getitem_775: "f32[3072][1]cuda:0" = _foreach_mul_2[35]
+	        getitem_776: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[36]
+	        getitem_777: "f32[768][1]cuda:0" = _foreach_mul_2[37]
+	        getitem_778: "f32[768][1]cuda:0" = _foreach_mul_2[38]
+	        getitem_779: "f32[768][1]cuda:0" = _foreach_mul_2[39]
+	        getitem_780: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[40]
+	        getitem_781: "f32[2304][1]cuda:0" = _foreach_mul_2[41]
+	        getitem_782: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[42]
+	        getitem_783: "f32[768][1]cuda:0" = _foreach_mul_2[43]
+	        getitem_784: "f32[768][1]cuda:0" = _foreach_mul_2[44]
+	        getitem_785: "f32[768][1]cuda:0" = _foreach_mul_2[45]
+	        getitem_786: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[46]
+	        getitem_787: "f32[3072][1]cuda:0" = _foreach_mul_2[47]
+	        getitem_788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[48]
+	        getitem_789: "f32[768][1]cuda:0" = _foreach_mul_2[49]
+	        getitem_790: "f32[768][1]cuda:0" = _foreach_mul_2[50]
+	        getitem_791: "f32[768][1]cuda:0" = _foreach_mul_2[51]
+	        getitem_792: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[52]
+	        getitem_793: "f32[2304][1]cuda:0" = _foreach_mul_2[53]
+	        getitem_794: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[54]
+	        getitem_795: "f32[768][1]cuda:0" = _foreach_mul_2[55]
+	        getitem_796: "f32[768][1]cuda:0" = _foreach_mul_2[56]
+	        getitem_797: "f32[768][1]cuda:0" = _foreach_mul_2[57]
+	        getitem_798: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[58]
+	        getitem_799: "f32[3072][1]cuda:0" = _foreach_mul_2[59]
+	        getitem_800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[60]
+	        getitem_801: "f32[768][1]cuda:0" = _foreach_mul_2[61]
+	        getitem_802: "f32[768][1]cuda:0" = _foreach_mul_2[62]
+	        getitem_803: "f32[768][1]cuda:0" = _foreach_mul_2[63]
+	        getitem_804: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[64]
+	        getitem_805: "f32[2304][1]cuda:0" = _foreach_mul_2[65]
+	        getitem_806: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[66]
+	        getitem_807: "f32[768][1]cuda:0" = _foreach_mul_2[67]
+	        getitem_808: "f32[768][1]cuda:0" = _foreach_mul_2[68]
+	        getitem_809: "f32[768][1]cuda:0" = _foreach_mul_2[69]
+	        getitem_810: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[70]
+	        getitem_811: "f32[3072][1]cuda:0" = _foreach_mul_2[71]
+	        getitem_812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[72]
+	        getitem_813: "f32[768][1]cuda:0" = _foreach_mul_2[73]
+	        getitem_814: "f32[768][1]cuda:0" = _foreach_mul_2[74]
+	        getitem_815: "f32[768][1]cuda:0" = _foreach_mul_2[75]
+	        getitem_816: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[76]
+	        getitem_817: "f32[2304][1]cuda:0" = _foreach_mul_2[77]
+	        getitem_818: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[78]
+	        getitem_819: "f32[768][1]cuda:0" = _foreach_mul_2[79]
+	        getitem_820: "f32[768][1]cuda:0" = _foreach_mul_2[80]
+	        getitem_821: "f32[768][1]cuda:0" = _foreach_mul_2[81]
+	        getitem_822: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[82]
+	        getitem_823: "f32[3072][1]cuda:0" = _foreach_mul_2[83]
+	        getitem_824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[84]
+	        getitem_825: "f32[768][1]cuda:0" = _foreach_mul_2[85]
+	        getitem_826: "f32[768][1]cuda:0" = _foreach_mul_2[86]
+	        getitem_827: "f32[768][1]cuda:0" = _foreach_mul_2[87]
+	        getitem_828: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[88]
+	        getitem_829: "f32[2304][1]cuda:0" = _foreach_mul_2[89]
+	        getitem_830: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[90]
+	        getitem_831: "f32[768][1]cuda:0" = _foreach_mul_2[91]
+	        getitem_832: "f32[768][1]cuda:0" = _foreach_mul_2[92]
+	        getitem_833: "f32[768][1]cuda:0" = _foreach_mul_2[93]
+	        getitem_834: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[94]
+	        getitem_835: "f32[3072][1]cuda:0" = _foreach_mul_2[95]
+	        getitem_836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[96]
+	        getitem_837: "f32[768][1]cuda:0" = _foreach_mul_2[97]
+	        getitem_838: "f32[768][1]cuda:0" = _foreach_mul_2[98]
+	        getitem_839: "f32[768][1]cuda:0" = _foreach_mul_2[99]
+	        getitem_840: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[100]
+	        getitem_841: "f32[2304][1]cuda:0" = _foreach_mul_2[101]
+	        getitem_842: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[102]
+	        getitem_843: "f32[768][1]cuda:0" = _foreach_mul_2[103]
+	        getitem_844: "f32[768][1]cuda:0" = _foreach_mul_2[104]
+	        getitem_845: "f32[768][1]cuda:0" = _foreach_mul_2[105]
+	        getitem_846: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[106]
+	        getitem_847: "f32[3072][1]cuda:0" = _foreach_mul_2[107]
+	        getitem_848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[108]
+	        getitem_849: "f32[768][1]cuda:0" = _foreach_mul_2[109]
+	        getitem_850: "f32[768][1]cuda:0" = _foreach_mul_2[110]
+	        getitem_851: "f32[768][1]cuda:0" = _foreach_mul_2[111]
+	        getitem_852: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[112]
+	        getitem_853: "f32[2304][1]cuda:0" = _foreach_mul_2[113]
+	        getitem_854: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[114]
+	        getitem_855: "f32[768][1]cuda:0" = _foreach_mul_2[115]
+	        getitem_856: "f32[768][1]cuda:0" = _foreach_mul_2[116]
+	        getitem_857: "f32[768][1]cuda:0" = _foreach_mul_2[117]
+	        getitem_858: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[118]
+	        getitem_859: "f32[3072][1]cuda:0" = _foreach_mul_2[119]
+	        getitem_860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[120]
+	        getitem_861: "f32[768][1]cuda:0" = _foreach_mul_2[121]
+	        getitem_862: "f32[768][1]cuda:0" = _foreach_mul_2[122]
+	        getitem_863: "f32[768][1]cuda:0" = _foreach_mul_2[123]
+	        getitem_864: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[124]
+	        getitem_865: "f32[2304][1]cuda:0" = _foreach_mul_2[125]
+	        getitem_866: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[126]
+	        getitem_867: "f32[768][1]cuda:0" = _foreach_mul_2[127]
+	        getitem_868: "f32[768][1]cuda:0" = _foreach_mul_2[128]
+	        getitem_869: "f32[768][1]cuda:0" = _foreach_mul_2[129]
+	        getitem_870: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[130]
+	        getitem_871: "f32[3072][1]cuda:0" = _foreach_mul_2[131]
+	        getitem_872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[132]
+	        getitem_873: "f32[768][1]cuda:0" = _foreach_mul_2[133]
+	        getitem_874: "f32[768][1]cuda:0" = _foreach_mul_2[134]
+	        getitem_875: "f32[768][1]cuda:0" = _foreach_mul_2[135]
+	        getitem_876: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[136]
+	        getitem_877: "f32[2304][1]cuda:0" = _foreach_mul_2[137]
+	        getitem_878: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[138]
+	        getitem_879: "f32[768][1]cuda:0" = _foreach_mul_2[139]
+	        getitem_880: "f32[768][1]cuda:0" = _foreach_mul_2[140]
+	        getitem_881: "f32[768][1]cuda:0" = _foreach_mul_2[141]
+	        getitem_882: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[142]
+	        getitem_883: "f32[3072][1]cuda:0" = _foreach_mul_2[143]
+	        getitem_884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[144]
+	        getitem_885: "f32[768][1]cuda:0" = _foreach_mul_2[145]
+	        getitem_886: "f32[768][1]cuda:0" = _foreach_mul_2[146]
+	        getitem_887: "f32[768][1]cuda:0" = _foreach_mul_2[147];  _foreach_mul_2 = None
+	        _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009);  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None
+	        getitem_888: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_2[0]
+	        getitem_889: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_2[1]
+	        getitem_890: "f32[768][1]cuda:0" = _foreach_add_2[2]
+	        getitem_891: "f32[768][1]cuda:0" = _foreach_add_2[3]
+	        getitem_892: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[4]
+	        getitem_893: "f32[2304][1]cuda:0" = _foreach_add_2[5]
+	        getitem_894: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[6]
+	        getitem_895: "f32[768][1]cuda:0" = _foreach_add_2[7]
+	        getitem_896: "f32[768][1]cuda:0" = _foreach_add_2[8]
+	        getitem_897: "f32[768][1]cuda:0" = _foreach_add_2[9]
+	        getitem_898: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[10]
+	        getitem_899: "f32[3072][1]cuda:0" = _foreach_add_2[11]
+	        getitem_900: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[12]
+	        getitem_901: "f32[768][1]cuda:0" = _foreach_add_2[13]
+	        getitem_902: "f32[768][1]cuda:0" = _foreach_add_2[14]
+	        getitem_903: "f32[768][1]cuda:0" = _foreach_add_2[15]
+	        getitem_904: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[16]
+	        getitem_905: "f32[2304][1]cuda:0" = _foreach_add_2[17]
+	        getitem_906: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[18]
+	        getitem_907: "f32[768][1]cuda:0" = _foreach_add_2[19]
+	        getitem_908: "f32[768][1]cuda:0" = _foreach_add_2[20]
+	        getitem_909: "f32[768][1]cuda:0" = _foreach_add_2[21]
+	        getitem_910: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[22]
+	        getitem_911: "f32[3072][1]cuda:0" = _foreach_add_2[23]
+	        getitem_912: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[24]
+	        getitem_913: "f32[768][1]cuda:0" = _foreach_add_2[25]
+	        getitem_914: "f32[768][1]cuda:0" = _foreach_add_2[26]
+	        getitem_915: "f32[768][1]cuda:0" = _foreach_add_2[27]
+	        getitem_916: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[28]
+	        getitem_917: "f32[2304][1]cuda:0" = _foreach_add_2[29]
+	        getitem_918: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[30]
+	        getitem_919: "f32[768][1]cuda:0" = _foreach_add_2[31]
+	        getitem_920: "f32[768][1]cuda:0" = _foreach_add_2[32]
+	        getitem_921: "f32[768][1]cuda:0" = _foreach_add_2[33]
+	        getitem_922: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[34]
+	        getitem_923: "f32[3072][1]cuda:0" = _foreach_add_2[35]
+	        getitem_924: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[36]
+	        getitem_925: "f32[768][1]cuda:0" = _foreach_add_2[37]
+	        getitem_926: "f32[768][1]cuda:0" = _foreach_add_2[38]
+	        getitem_927: "f32[768][1]cuda:0" = _foreach_add_2[39]
+	        getitem_928: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[40]
+	        getitem_929: "f32[2304][1]cuda:0" = _foreach_add_2[41]
+	        getitem_930: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[42]
+	        getitem_931: "f32[768][1]cuda:0" = _foreach_add_2[43]
+	        getitem_932: "f32[768][1]cuda:0" = _foreach_add_2[44]
+	        getitem_933: "f32[768][1]cuda:0" = _foreach_add_2[45]
+	        getitem_934: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[46]
+	        getitem_935: "f32[3072][1]cuda:0" = _foreach_add_2[47]
+	        getitem_936: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[48]
+	        getitem_937: "f32[768][1]cuda:0" = _foreach_add_2[49]
+	        getitem_938: "f32[768][1]cuda:0" = _foreach_add_2[50]
+	        getitem_939: "f32[768][1]cuda:0" = _foreach_add_2[51]
+	        getitem_940: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[52]
+	        getitem_941: "f32[2304][1]cuda:0" = _foreach_add_2[53]
+	        getitem_942: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[54]
+	        getitem_943: "f32[768][1]cuda:0" = _foreach_add_2[55]
+	        getitem_944: "f32[768][1]cuda:0" = _foreach_add_2[56]
+	        getitem_945: "f32[768][1]cuda:0" = _foreach_add_2[57]
+	        getitem_946: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[58]
+	        getitem_947: "f32[3072][1]cuda:0" = _foreach_add_2[59]
+	        getitem_948: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[60]
+	        getitem_949: "f32[768][1]cuda:0" = _foreach_add_2[61]
+	        getitem_950: "f32[768][1]cuda:0" = _foreach_add_2[62]
+	        getitem_951: "f32[768][1]cuda:0" = _foreach_add_2[63]
+	        getitem_952: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[64]
+	        getitem_953: "f32[2304][1]cuda:0" = _foreach_add_2[65]
+	        getitem_954: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[66]
+	        getitem_955: "f32[768][1]cuda:0" = _foreach_add_2[67]
+	        getitem_956: "f32[768][1]cuda:0" = _foreach_add_2[68]
+	        getitem_957: "f32[768][1]cuda:0" = _foreach_add_2[69]
+	        getitem_958: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[70]
+	        getitem_959: "f32[3072][1]cuda:0" = _foreach_add_2[71]
+	        getitem_960: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[72]
+	        getitem_961: "f32[768][1]cuda:0" = _foreach_add_2[73]
+	        getitem_962: "f32[768][1]cuda:0" = _foreach_add_2[74]
+	        getitem_963: "f32[768][1]cuda:0" = _foreach_add_2[75]
+	        getitem_964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[76]
+	        getitem_965: "f32[2304][1]cuda:0" = _foreach_add_2[77]
+	        getitem_966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[78]
+	        getitem_967: "f32[768][1]cuda:0" = _foreach_add_2[79]
+	        getitem_968: "f32[768][1]cuda:0" = _foreach_add_2[80]
+	        getitem_969: "f32[768][1]cuda:0" = _foreach_add_2[81]
+	        getitem_970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[82]
+	        getitem_971: "f32[3072][1]cuda:0" = _foreach_add_2[83]
+	        getitem_972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[84]
+	        getitem_973: "f32[768][1]cuda:0" = _foreach_add_2[85]
+	        getitem_974: "f32[768][1]cuda:0" = _foreach_add_2[86]
+	        getitem_975: "f32[768][1]cuda:0" = _foreach_add_2[87]
+	        getitem_976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[88]
+	        getitem_977: "f32[2304][1]cuda:0" = _foreach_add_2[89]
+	        getitem_978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[90]
+	        getitem_979: "f32[768][1]cuda:0" = _foreach_add_2[91]
+	        getitem_980: "f32[768][1]cuda:0" = _foreach_add_2[92]
+	        getitem_981: "f32[768][1]cuda:0" = _foreach_add_2[93]
+	        getitem_982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[94]
+	        getitem_983: "f32[3072][1]cuda:0" = _foreach_add_2[95]
+	        getitem_984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[96]
+	        getitem_985: "f32[768][1]cuda:0" = _foreach_add_2[97]
+	        getitem_986: "f32[768][1]cuda:0" = _foreach_add_2[98]
+	        getitem_987: "f32[768][1]cuda:0" = _foreach_add_2[99]
+	        getitem_988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[100]
+	        getitem_989: "f32[2304][1]cuda:0" = _foreach_add_2[101]
+	        getitem_990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[102]
+	        getitem_991: "f32[768][1]cuda:0" = _foreach_add_2[103]
+	        getitem_992: "f32[768][1]cuda:0" = _foreach_add_2[104]
+	        getitem_993: "f32[768][1]cuda:0" = _foreach_add_2[105]
+	        getitem_994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[106]
+	        getitem_995: "f32[3072][1]cuda:0" = _foreach_add_2[107]
+	        getitem_996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[108]
+	        getitem_997: "f32[768][1]cuda:0" = _foreach_add_2[109]
+	        getitem_998: "f32[768][1]cuda:0" = _foreach_add_2[110]
+	        getitem_999: "f32[768][1]cuda:0" = _foreach_add_2[111]
+	        getitem_1000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[112]
+	        getitem_1001: "f32[2304][1]cuda:0" = _foreach_add_2[113]
+	        getitem_1002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[114]
+	        getitem_1003: "f32[768][1]cuda:0" = _foreach_add_2[115]
+	        getitem_1004: "f32[768][1]cuda:0" = _foreach_add_2[116]
+	        getitem_1005: "f32[768][1]cuda:0" = _foreach_add_2[117]
+	        getitem_1006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[118]
+	        getitem_1007: "f32[3072][1]cuda:0" = _foreach_add_2[119]
+	        getitem_1008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[120]
+	        getitem_1009: "f32[768][1]cuda:0" = _foreach_add_2[121]
+	        getitem_1010: "f32[768][1]cuda:0" = _foreach_add_2[122]
+	        getitem_1011: "f32[768][1]cuda:0" = _foreach_add_2[123]
+	        getitem_1012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[124]
+	        getitem_1013: "f32[2304][1]cuda:0" = _foreach_add_2[125]
+	        getitem_1014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[126]
+	        getitem_1015: "f32[768][1]cuda:0" = _foreach_add_2[127]
+	        getitem_1016: "f32[768][1]cuda:0" = _foreach_add_2[128]
+	        getitem_1017: "f32[768][1]cuda:0" = _foreach_add_2[129]
+	        getitem_1018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[130]
+	        getitem_1019: "f32[3072][1]cuda:0" = _foreach_add_2[131]
+	        getitem_1020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[132]
+	        getitem_1021: "f32[768][1]cuda:0" = _foreach_add_2[133]
+	        getitem_1022: "f32[768][1]cuda:0" = _foreach_add_2[134]
+	        getitem_1023: "f32[768][1]cuda:0" = _foreach_add_2[135]
+	        getitem_1024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[136]
+	        getitem_1025: "f32[2304][1]cuda:0" = _foreach_add_2[137]
+	        getitem_1026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[138]
+	        getitem_1027: "f32[768][1]cuda:0" = _foreach_add_2[139]
+	        getitem_1028: "f32[768][1]cuda:0" = _foreach_add_2[140]
+	        getitem_1029: "f32[768][1]cuda:0" = _foreach_add_2[141]
+	        getitem_1030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[142]
+	        getitem_1031: "f32[3072][1]cuda:0" = _foreach_add_2[143]
+	        getitem_1032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[144]
+	        getitem_1033: "f32[768][1]cuda:0" = _foreach_add_2[145]
+	        getitem_1034: "f32[768][1]cuda:0" = _foreach_add_2[146]
+	        getitem_1035: "f32[768][1]cuda:0" = _foreach_add_2[147];  _foreach_add_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps)
+	        _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])
+	        getitem_1036: "f32[][]cuda:0" = _foreach_pow[0]
+	        getitem_1037: "f32[][]cuda:0" = _foreach_pow[1]
+	        getitem_1038: "f32[][]cuda:0" = _foreach_pow[2]
+	        getitem_1039: "f32[][]cuda:0" = _foreach_pow[3]
+	        getitem_1040: "f32[][]cuda:0" = _foreach_pow[4]
+	        getitem_1041: "f32[][]cuda:0" = _foreach_pow[5]
+	        getitem_1042: "f32[][]cuda:0" = _foreach_pow[6]
+	        getitem_1043: "f32[][]cuda:0" = _foreach_pow[7]
+	        getitem_1044: "f32[][]cuda:0" = _foreach_pow[8]
+	        getitem_1045: "f32[][]cuda:0" = _foreach_pow[9]
+	        getitem_1046: "f32[][]cuda:0" = _foreach_pow[10]
+	        getitem_1047: "f32[][]cuda:0" = _foreach_pow[11]
+	        getitem_1048: "f32[][]cuda:0" = _foreach_pow[12]
+	        getitem_1049: "f32[][]cuda:0" = _foreach_pow[13]
+	        getitem_1050: "f32[][]cuda:0" = _foreach_pow[14]
+	        getitem_1051: "f32[][]cuda:0" = _foreach_pow[15]
+	        getitem_1052: "f32[][]cuda:0" = _foreach_pow[16]
+	        getitem_1053: "f32[][]cuda:0" = _foreach_pow[17]
+	        getitem_1054: "f32[][]cuda:0" = _foreach_pow[18]
+	        getitem_1055: "f32[][]cuda:0" = _foreach_pow[19]
+	        getitem_1056: "f32[][]cuda:0" = _foreach_pow[20]
+	        getitem_1057: "f32[][]cuda:0" = _foreach_pow[21]
+	        getitem_1058: "f32[][]cuda:0" = _foreach_pow[22]
+	        getitem_1059: "f32[][]cuda:0" = _foreach_pow[23]
+	        getitem_1060: "f32[][]cuda:0" = _foreach_pow[24]
+	        getitem_1061: "f32[][]cuda:0" = _foreach_pow[25]
+	        getitem_1062: "f32[][]cuda:0" = _foreach_pow[26]
+	        getitem_1063: "f32[][]cuda:0" = _foreach_pow[27]
+	        getitem_1064: "f32[][]cuda:0" = _foreach_pow[28]
+	        getitem_1065: "f32[][]cuda:0" = _foreach_pow[29]
+	        getitem_1066: "f32[][]cuda:0" = _foreach_pow[30]
+	        getitem_1067: "f32[][]cuda:0" = _foreach_pow[31]
+	        getitem_1068: "f32[][]cuda:0" = _foreach_pow[32]
+	        getitem_1069: "f32[][]cuda:0" = _foreach_pow[33]
+	        getitem_1070: "f32[][]cuda:0" = _foreach_pow[34]
+	        getitem_1071: "f32[][]cuda:0" = _foreach_pow[35]
+	        getitem_1072: "f32[][]cuda:0" = _foreach_pow[36]
+	        getitem_1073: "f32[][]cuda:0" = _foreach_pow[37]
+	        getitem_1074: "f32[][]cuda:0" = _foreach_pow[38]
+	        getitem_1075: "f32[][]cuda:0" = _foreach_pow[39]
+	        getitem_1076: "f32[][]cuda:0" = _foreach_pow[40]
+	        getitem_1077: "f32[][]cuda:0" = _foreach_pow[41]
+	        getitem_1078: "f32[][]cuda:0" = _foreach_pow[42]
+	        getitem_1079: "f32[][]cuda:0" = _foreach_pow[43]
+	        getitem_1080: "f32[][]cuda:0" = _foreach_pow[44]
+	        getitem_1081: "f32[][]cuda:0" = _foreach_pow[45]
+	        getitem_1082: "f32[][]cuda:0" = _foreach_pow[46]
+	        getitem_1083: "f32[][]cuda:0" = _foreach_pow[47]
+	        getitem_1084: "f32[][]cuda:0" = _foreach_pow[48]
+	        getitem_1085: "f32[][]cuda:0" = _foreach_pow[49]
+	        getitem_1086: "f32[][]cuda:0" = _foreach_pow[50]
+	        getitem_1087: "f32[][]cuda:0" = _foreach_pow[51]
+	        getitem_1088: "f32[][]cuda:0" = _foreach_pow[52]
+	        getitem_1089: "f32[][]cuda:0" = _foreach_pow[53]
+	        getitem_1090: "f32[][]cuda:0" = _foreach_pow[54]
+	        getitem_1091: "f32[][]cuda:0" = _foreach_pow[55]
+	        getitem_1092: "f32[][]cuda:0" = _foreach_pow[56]
+	        getitem_1093: "f32[][]cuda:0" = _foreach_pow[57]
+	        getitem_1094: "f32[][]cuda:0" = _foreach_pow[58]
+	        getitem_1095: "f32[][]cuda:0" = _foreach_pow[59]
+	        getitem_1096: "f32[][]cuda:0" = _foreach_pow[60]
+	        getitem_1097: "f32[][]cuda:0" = _foreach_pow[61]
+	        getitem_1098: "f32[][]cuda:0" = _foreach_pow[62]
+	        getitem_1099: "f32[][]cuda:0" = _foreach_pow[63]
+	        getitem_1100: "f32[][]cuda:0" = _foreach_pow[64]
+	        getitem_1101: "f32[][]cuda:0" = _foreach_pow[65]
+	        getitem_1102: "f32[][]cuda:0" = _foreach_pow[66]
+	        getitem_1103: "f32[][]cuda:0" = _foreach_pow[67]
+	        getitem_1104: "f32[][]cuda:0" = _foreach_pow[68]
+	        getitem_1105: "f32[][]cuda:0" = _foreach_pow[69]
+	        getitem_1106: "f32[][]cuda:0" = _foreach_pow[70]
+	        getitem_1107: "f32[][]cuda:0" = _foreach_pow[71]
+	        getitem_1108: "f32[][]cuda:0" = _foreach_pow[72]
+	        getitem_1109: "f32[][]cuda:0" = _foreach_pow[73]
+	        getitem_1110: "f32[][]cuda:0" = _foreach_pow[74]
+	        getitem_1111: "f32[][]cuda:0" = _foreach_pow[75]
+	        getitem_1112: "f32[][]cuda:0" = _foreach_pow[76]
+	        getitem_1113: "f32[][]cuda:0" = _foreach_pow[77]
+	        getitem_1114: "f32[][]cuda:0" = _foreach_pow[78]
+	        getitem_1115: "f32[][]cuda:0" = _foreach_pow[79]
+	        getitem_1116: "f32[][]cuda:0" = _foreach_pow[80]
+	        getitem_1117: "f32[][]cuda:0" = _foreach_pow[81]
+	        getitem_1118: "f32[][]cuda:0" = _foreach_pow[82]
+	        getitem_1119: "f32[][]cuda:0" = _foreach_pow[83]
+	        getitem_1120: "f32[][]cuda:0" = _foreach_pow[84]
+	        getitem_1121: "f32[][]cuda:0" = _foreach_pow[85]
+	        getitem_1122: "f32[][]cuda:0" = _foreach_pow[86]
+	        getitem_1123: "f32[][]cuda:0" = _foreach_pow[87]
+	        getitem_1124: "f32[][]cuda:0" = _foreach_pow[88]
+	        getitem_1125: "f32[][]cuda:0" = _foreach_pow[89]
+	        getitem_1126: "f32[][]cuda:0" = _foreach_pow[90]
+	        getitem_1127: "f32[][]cuda:0" = _foreach_pow[91]
+	        getitem_1128: "f32[][]cuda:0" = _foreach_pow[92]
+	        getitem_1129: "f32[][]cuda:0" = _foreach_pow[93]
+	        getitem_1130: "f32[][]cuda:0" = _foreach_pow[94]
+	        getitem_1131: "f32[][]cuda:0" = _foreach_pow[95]
+	        getitem_1132: "f32[][]cuda:0" = _foreach_pow[96]
+	        getitem_1133: "f32[][]cuda:0" = _foreach_pow[97]
+	        getitem_1134: "f32[][]cuda:0" = _foreach_pow[98]
+	        getitem_1135: "f32[][]cuda:0" = _foreach_pow[99]
+	        getitem_1136: "f32[][]cuda:0" = _foreach_pow[100]
+	        getitem_1137: "f32[][]cuda:0" = _foreach_pow[101]
+	        getitem_1138: "f32[][]cuda:0" = _foreach_pow[102]
+	        getitem_1139: "f32[][]cuda:0" = _foreach_pow[103]
+	        getitem_1140: "f32[][]cuda:0" = _foreach_pow[104]
+	        getitem_1141: "f32[][]cuda:0" = _foreach_pow[105]
+	        getitem_1142: "f32[][]cuda:0" = _foreach_pow[106]
+	        getitem_1143: "f32[][]cuda:0" = _foreach_pow[107]
+	        getitem_1144: "f32[][]cuda:0" = _foreach_pow[108]
+	        getitem_1145: "f32[][]cuda:0" = _foreach_pow[109]
+	        getitem_1146: "f32[][]cuda:0" = _foreach_pow[110]
+	        getitem_1147: "f32[][]cuda:0" = _foreach_pow[111]
+	        getitem_1148: "f32[][]cuda:0" = _foreach_pow[112]
+	        getitem_1149: "f32[][]cuda:0" = _foreach_pow[113]
+	        getitem_1150: "f32[][]cuda:0" = _foreach_pow[114]
+	        getitem_1151: "f32[][]cuda:0" = _foreach_pow[115]
+	        getitem_1152: "f32[][]cuda:0" = _foreach_pow[116]
+	        getitem_1153: "f32[][]cuda:0" = _foreach_pow[117]
+	        getitem_1154: "f32[][]cuda:0" = _foreach_pow[118]
+	        getitem_1155: "f32[][]cuda:0" = _foreach_pow[119]
+	        getitem_1156: "f32[][]cuda:0" = _foreach_pow[120]
+	        getitem_1157: "f32[][]cuda:0" = _foreach_pow[121]
+	        getitem_1158: "f32[][]cuda:0" = _foreach_pow[122]
+	        getitem_1159: "f32[][]cuda:0" = _foreach_pow[123]
+	        getitem_1160: "f32[][]cuda:0" = _foreach_pow[124]
+	        getitem_1161: "f32[][]cuda:0" = _foreach_pow[125]
+	        getitem_1162: "f32[][]cuda:0" = _foreach_pow[126]
+	        getitem_1163: "f32[][]cuda:0" = _foreach_pow[127]
+	        getitem_1164: "f32[][]cuda:0" = _foreach_pow[128]
+	        getitem_1165: "f32[][]cuda:0" = _foreach_pow[129]
+	        getitem_1166: "f32[][]cuda:0" = _foreach_pow[130]
+	        getitem_1167: "f32[][]cuda:0" = _foreach_pow[131]
+	        getitem_1168: "f32[][]cuda:0" = _foreach_pow[132]
+	        getitem_1169: "f32[][]cuda:0" = _foreach_pow[133]
+	        getitem_1170: "f32[][]cuda:0" = _foreach_pow[134]
+	        getitem_1171: "f32[][]cuda:0" = _foreach_pow[135]
+	        getitem_1172: "f32[][]cuda:0" = _foreach_pow[136]
+	        getitem_1173: "f32[][]cuda:0" = _foreach_pow[137]
+	        getitem_1174: "f32[][]cuda:0" = _foreach_pow[138]
+	        getitem_1175: "f32[][]cuda:0" = _foreach_pow[139]
+	        getitem_1176: "f32[][]cuda:0" = _foreach_pow[140]
+	        getitem_1177: "f32[][]cuda:0" = _foreach_pow[141]
+	        getitem_1178: "f32[][]cuda:0" = _foreach_pow[142]
+	        getitem_1179: "f32[][]cuda:0" = _foreach_pow[143]
+	        getitem_1180: "f32[][]cuda:0" = _foreach_pow[144]
+	        getitem_1181: "f32[][]cuda:0" = _foreach_pow[145]
+	        getitem_1182: "f32[][]cuda:0" = _foreach_pow[146]
+	        getitem_1183: "f32[][]cuda:0" = _foreach_pow[147];  _foreach_pow = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps)
+	        _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])
+	        getitem_1184: "f32[][]cuda:0" = _foreach_pow_1[0]
+	        getitem_1185: "f32[][]cuda:0" = _foreach_pow_1[1]
+	        getitem_1186: "f32[][]cuda:0" = _foreach_pow_1[2]
+	        getitem_1187: "f32[][]cuda:0" = _foreach_pow_1[3]
+	        getitem_1188: "f32[][]cuda:0" = _foreach_pow_1[4]
+	        getitem_1189: "f32[][]cuda:0" = _foreach_pow_1[5]
+	        getitem_1190: "f32[][]cuda:0" = _foreach_pow_1[6]
+	        getitem_1191: "f32[][]cuda:0" = _foreach_pow_1[7]
+	        getitem_1192: "f32[][]cuda:0" = _foreach_pow_1[8]
+	        getitem_1193: "f32[][]cuda:0" = _foreach_pow_1[9]
+	        getitem_1194: "f32[][]cuda:0" = _foreach_pow_1[10]
+	        getitem_1195: "f32[][]cuda:0" = _foreach_pow_1[11]
+	        getitem_1196: "f32[][]cuda:0" = _foreach_pow_1[12]
+	        getitem_1197: "f32[][]cuda:0" = _foreach_pow_1[13]
+	        getitem_1198: "f32[][]cuda:0" = _foreach_pow_1[14]
+	        getitem_1199: "f32[][]cuda:0" = _foreach_pow_1[15]
+	        getitem_1200: "f32[][]cuda:0" = _foreach_pow_1[16]
+	        getitem_1201: "f32[][]cuda:0" = _foreach_pow_1[17]
+	        getitem_1202: "f32[][]cuda:0" = _foreach_pow_1[18]
+	        getitem_1203: "f32[][]cuda:0" = _foreach_pow_1[19]
+	        getitem_1204: "f32[][]cuda:0" = _foreach_pow_1[20]
+	        getitem_1205: "f32[][]cuda:0" = _foreach_pow_1[21]
+	        getitem_1206: "f32[][]cuda:0" = _foreach_pow_1[22]
+	        getitem_1207: "f32[][]cuda:0" = _foreach_pow_1[23]
+	        getitem_1208: "f32[][]cuda:0" = _foreach_pow_1[24]
+	        getitem_1209: "f32[][]cuda:0" = _foreach_pow_1[25]
+	        getitem_1210: "f32[][]cuda:0" = _foreach_pow_1[26]
+	        getitem_1211: "f32[][]cuda:0" = _foreach_pow_1[27]
+	        getitem_1212: "f32[][]cuda:0" = _foreach_pow_1[28]
+	        getitem_1213: "f32[][]cuda:0" = _foreach_pow_1[29]
+	        getitem_1214: "f32[][]cuda:0" = _foreach_pow_1[30]
+	        getitem_1215: "f32[][]cuda:0" = _foreach_pow_1[31]
+	        getitem_1216: "f32[][]cuda:0" = _foreach_pow_1[32]
+	        getitem_1217: "f32[][]cuda:0" = _foreach_pow_1[33]
+	        getitem_1218: "f32[][]cuda:0" = _foreach_pow_1[34]
+	        getitem_1219: "f32[][]cuda:0" = _foreach_pow_1[35]
+	        getitem_1220: "f32[][]cuda:0" = _foreach_pow_1[36]
+	        getitem_1221: "f32[][]cuda:0" = _foreach_pow_1[37]
+	        getitem_1222: "f32[][]cuda:0" = _foreach_pow_1[38]
+	        getitem_1223: "f32[][]cuda:0" = _foreach_pow_1[39]
+	        getitem_1224: "f32[][]cuda:0" = _foreach_pow_1[40]
+	        getitem_1225: "f32[][]cuda:0" = _foreach_pow_1[41]
+	        getitem_1226: "f32[][]cuda:0" = _foreach_pow_1[42]
+	        getitem_1227: "f32[][]cuda:0" = _foreach_pow_1[43]
+	        getitem_1228: "f32[][]cuda:0" = _foreach_pow_1[44]
+	        getitem_1229: "f32[][]cuda:0" = _foreach_pow_1[45]
+	        getitem_1230: "f32[][]cuda:0" = _foreach_pow_1[46]
+	        getitem_1231: "f32[][]cuda:0" = _foreach_pow_1[47]
+	        getitem_1232: "f32[][]cuda:0" = _foreach_pow_1[48]
+	        getitem_1233: "f32[][]cuda:0" = _foreach_pow_1[49]
+	        getitem_1234: "f32[][]cuda:0" = _foreach_pow_1[50]
+	        getitem_1235: "f32[][]cuda:0" = _foreach_pow_1[51]
+	        getitem_1236: "f32[][]cuda:0" = _foreach_pow_1[52]
+	        getitem_1237: "f32[][]cuda:0" = _foreach_pow_1[53]
+	        getitem_1238: "f32[][]cuda:0" = _foreach_pow_1[54]
+	        getitem_1239: "f32[][]cuda:0" = _foreach_pow_1[55]
+	        getitem_1240: "f32[][]cuda:0" = _foreach_pow_1[56]
+	        getitem_1241: "f32[][]cuda:0" = _foreach_pow_1[57]
+	        getitem_1242: "f32[][]cuda:0" = _foreach_pow_1[58]
+	        getitem_1243: "f32[][]cuda:0" = _foreach_pow_1[59]
+	        getitem_1244: "f32[][]cuda:0" = _foreach_pow_1[60]
+	        getitem_1245: "f32[][]cuda:0" = _foreach_pow_1[61]
+	        getitem_1246: "f32[][]cuda:0" = _foreach_pow_1[62]
+	        getitem_1247: "f32[][]cuda:0" = _foreach_pow_1[63]
+	        getitem_1248: "f32[][]cuda:0" = _foreach_pow_1[64]
+	        getitem_1249: "f32[][]cuda:0" = _foreach_pow_1[65]
+	        getitem_1250: "f32[][]cuda:0" = _foreach_pow_1[66]
+	        getitem_1251: "f32[][]cuda:0" = _foreach_pow_1[67]
+	        getitem_1252: "f32[][]cuda:0" = _foreach_pow_1[68]
+	        getitem_1253: "f32[][]cuda:0" = _foreach_pow_1[69]
+	        getitem_1254: "f32[][]cuda:0" = _foreach_pow_1[70]
+	        getitem_1255: "f32[][]cuda:0" = _foreach_pow_1[71]
+	        getitem_1256: "f32[][]cuda:0" = _foreach_pow_1[72]
+	        getitem_1257: "f32[][]cuda:0" = _foreach_pow_1[73]
+	        getitem_1258: "f32[][]cuda:0" = _foreach_pow_1[74]
+	        getitem_1259: "f32[][]cuda:0" = _foreach_pow_1[75]
+	        getitem_1260: "f32[][]cuda:0" = _foreach_pow_1[76]
+	        getitem_1261: "f32[][]cuda:0" = _foreach_pow_1[77]
+	        getitem_1262: "f32[][]cuda:0" = _foreach_pow_1[78]
+	        getitem_1263: "f32[][]cuda:0" = _foreach_pow_1[79]
+	        getitem_1264: "f32[][]cuda:0" = _foreach_pow_1[80]
+	        getitem_1265: "f32[][]cuda:0" = _foreach_pow_1[81]
+	        getitem_1266: "f32[][]cuda:0" = _foreach_pow_1[82]
+	        getitem_1267: "f32[][]cuda:0" = _foreach_pow_1[83]
+	        getitem_1268: "f32[][]cuda:0" = _foreach_pow_1[84]
+	        getitem_1269: "f32[][]cuda:0" = _foreach_pow_1[85]
+	        getitem_1270: "f32[][]cuda:0" = _foreach_pow_1[86]
+	        getitem_1271: "f32[][]cuda:0" = _foreach_pow_1[87]
+	        getitem_1272: "f32[][]cuda:0" = _foreach_pow_1[88]
+	        getitem_1273: "f32[][]cuda:0" = _foreach_pow_1[89]
+	        getitem_1274: "f32[][]cuda:0" = _foreach_pow_1[90]
+	        getitem_1275: "f32[][]cuda:0" = _foreach_pow_1[91]
+	        getitem_1276: "f32[][]cuda:0" = _foreach_pow_1[92]
+	        getitem_1277: "f32[][]cuda:0" = _foreach_pow_1[93]
+	        getitem_1278: "f32[][]cuda:0" = _foreach_pow_1[94]
+	        getitem_1279: "f32[][]cuda:0" = _foreach_pow_1[95]
+	        getitem_1280: "f32[][]cuda:0" = _foreach_pow_1[96]
+	        getitem_1281: "f32[][]cuda:0" = _foreach_pow_1[97]
+	        getitem_1282: "f32[][]cuda:0" = _foreach_pow_1[98]
+	        getitem_1283: "f32[][]cuda:0" = _foreach_pow_1[99]
+	        getitem_1284: "f32[][]cuda:0" = _foreach_pow_1[100]
+	        getitem_1285: "f32[][]cuda:0" = _foreach_pow_1[101]
+	        getitem_1286: "f32[][]cuda:0" = _foreach_pow_1[102]
+	        getitem_1287: "f32[][]cuda:0" = _foreach_pow_1[103]
+	        getitem_1288: "f32[][]cuda:0" = _foreach_pow_1[104]
+	        getitem_1289: "f32[][]cuda:0" = _foreach_pow_1[105]
+	        getitem_1290: "f32[][]cuda:0" = _foreach_pow_1[106]
+	        getitem_1291: "f32[][]cuda:0" = _foreach_pow_1[107]
+	        getitem_1292: "f32[][]cuda:0" = _foreach_pow_1[108]
+	        getitem_1293: "f32[][]cuda:0" = _foreach_pow_1[109]
+	        getitem_1294: "f32[][]cuda:0" = _foreach_pow_1[110]
+	        getitem_1295: "f32[][]cuda:0" = _foreach_pow_1[111]
+	        getitem_1296: "f32[][]cuda:0" = _foreach_pow_1[112]
+	        getitem_1297: "f32[][]cuda:0" = _foreach_pow_1[113]
+	        getitem_1298: "f32[][]cuda:0" = _foreach_pow_1[114]
+	        getitem_1299: "f32[][]cuda:0" = _foreach_pow_1[115]
+	        getitem_1300: "f32[][]cuda:0" = _foreach_pow_1[116]
+	        getitem_1301: "f32[][]cuda:0" = _foreach_pow_1[117]
+	        getitem_1302: "f32[][]cuda:0" = _foreach_pow_1[118]
+	        getitem_1303: "f32[][]cuda:0" = _foreach_pow_1[119]
+	        getitem_1304: "f32[][]cuda:0" = _foreach_pow_1[120]
+	        getitem_1305: "f32[][]cuda:0" = _foreach_pow_1[121]
+	        getitem_1306: "f32[][]cuda:0" = _foreach_pow_1[122]
+	        getitem_1307: "f32[][]cuda:0" = _foreach_pow_1[123]
+	        getitem_1308: "f32[][]cuda:0" = _foreach_pow_1[124]
+	        getitem_1309: "f32[][]cuda:0" = _foreach_pow_1[125]
+	        getitem_1310: "f32[][]cuda:0" = _foreach_pow_1[126]
+	        getitem_1311: "f32[][]cuda:0" = _foreach_pow_1[127]
+	        getitem_1312: "f32[][]cuda:0" = _foreach_pow_1[128]
+	        getitem_1313: "f32[][]cuda:0" = _foreach_pow_1[129]
+	        getitem_1314: "f32[][]cuda:0" = _foreach_pow_1[130]
+	        getitem_1315: "f32[][]cuda:0" = _foreach_pow_1[131]
+	        getitem_1316: "f32[][]cuda:0" = _foreach_pow_1[132]
+	        getitem_1317: "f32[][]cuda:0" = _foreach_pow_1[133]
+	        getitem_1318: "f32[][]cuda:0" = _foreach_pow_1[134]
+	        getitem_1319: "f32[][]cuda:0" = _foreach_pow_1[135]
+	        getitem_1320: "f32[][]cuda:0" = _foreach_pow_1[136]
+	        getitem_1321: "f32[][]cuda:0" = _foreach_pow_1[137]
+	        getitem_1322: "f32[][]cuda:0" = _foreach_pow_1[138]
+	        getitem_1323: "f32[][]cuda:0" = _foreach_pow_1[139]
+	        getitem_1324: "f32[][]cuda:0" = _foreach_pow_1[140]
+	        getitem_1325: "f32[][]cuda:0" = _foreach_pow_1[141]
+	        getitem_1326: "f32[][]cuda:0" = _foreach_pow_1[142]
+	        getitem_1327: "f32[][]cuda:0" = _foreach_pow_1[143]
+	        getitem_1328: "f32[][]cuda:0" = _foreach_pow_1[144]
+	        getitem_1329: "f32[][]cuda:0" = _foreach_pow_1[145]
+	        getitem_1330: "f32[][]cuda:0" = _foreach_pow_1[146]
+	        getitem_1331: "f32[][]cuda:0" = _foreach_pow_1[147];  _foreach_pow_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1)
+	        _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1);  getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None
+	        getitem_1332: "f32[][]cuda:0" = _foreach_sub_1[0]
+	        getitem_1333: "f32[][]cuda:0" = _foreach_sub_1[1]
+	        getitem_1334: "f32[][]cuda:0" = _foreach_sub_1[2]
+	        getitem_1335: "f32[][]cuda:0" = _foreach_sub_1[3]
+	        getitem_1336: "f32[][]cuda:0" = _foreach_sub_1[4]
+	        getitem_1337: "f32[][]cuda:0" = _foreach_sub_1[5]
+	        getitem_1338: "f32[][]cuda:0" = _foreach_sub_1[6]
+	        getitem_1339: "f32[][]cuda:0" = _foreach_sub_1[7]
+	        getitem_1340: "f32[][]cuda:0" = _foreach_sub_1[8]
+	        getitem_1341: "f32[][]cuda:0" = _foreach_sub_1[9]
+	        getitem_1342: "f32[][]cuda:0" = _foreach_sub_1[10]
+	        getitem_1343: "f32[][]cuda:0" = _foreach_sub_1[11]
+	        getitem_1344: "f32[][]cuda:0" = _foreach_sub_1[12]
+	        getitem_1345: "f32[][]cuda:0" = _foreach_sub_1[13]
+	        getitem_1346: "f32[][]cuda:0" = _foreach_sub_1[14]
+	        getitem_1347: "f32[][]cuda:0" = _foreach_sub_1[15]
+	        getitem_1348: "f32[][]cuda:0" = _foreach_sub_1[16]
+	        getitem_1349: "f32[][]cuda:0" = _foreach_sub_1[17]
+	        getitem_1350: "f32[][]cuda:0" = _foreach_sub_1[18]
+	        getitem_1351: "f32[][]cuda:0" = _foreach_sub_1[19]
+	        getitem_1352: "f32[][]cuda:0" = _foreach_sub_1[20]
+	        getitem_1353: "f32[][]cuda:0" = _foreach_sub_1[21]
+	        getitem_1354: "f32[][]cuda:0" = _foreach_sub_1[22]
+	        getitem_1355: "f32[][]cuda:0" = _foreach_sub_1[23]
+	        getitem_1356: "f32[][]cuda:0" = _foreach_sub_1[24]
+	        getitem_1357: "f32[][]cuda:0" = _foreach_sub_1[25]
+	        getitem_1358: "f32[][]cuda:0" = _foreach_sub_1[26]
+	        getitem_1359: "f32[][]cuda:0" = _foreach_sub_1[27]
+	        getitem_1360: "f32[][]cuda:0" = _foreach_sub_1[28]
+	        getitem_1361: "f32[][]cuda:0" = _foreach_sub_1[29]
+	        getitem_1362: "f32[][]cuda:0" = _foreach_sub_1[30]
+	        getitem_1363: "f32[][]cuda:0" = _foreach_sub_1[31]
+	        getitem_1364: "f32[][]cuda:0" = _foreach_sub_1[32]
+	        getitem_1365: "f32[][]cuda:0" = _foreach_sub_1[33]
+	        getitem_1366: "f32[][]cuda:0" = _foreach_sub_1[34]
+	        getitem_1367: "f32[][]cuda:0" = _foreach_sub_1[35]
+	        getitem_1368: "f32[][]cuda:0" = _foreach_sub_1[36]
+	        getitem_1369: "f32[][]cuda:0" = _foreach_sub_1[37]
+	        getitem_1370: "f32[][]cuda:0" = _foreach_sub_1[38]
+	        getitem_1371: "f32[][]cuda:0" = _foreach_sub_1[39]
+	        getitem_1372: "f32[][]cuda:0" = _foreach_sub_1[40]
+	        getitem_1373: "f32[][]cuda:0" = _foreach_sub_1[41]
+	        getitem_1374: "f32[][]cuda:0" = _foreach_sub_1[42]
+	        getitem_1375: "f32[][]cuda:0" = _foreach_sub_1[43]
+	        getitem_1376: "f32[][]cuda:0" = _foreach_sub_1[44]
+	        getitem_1377: "f32[][]cuda:0" = _foreach_sub_1[45]
+	        getitem_1378: "f32[][]cuda:0" = _foreach_sub_1[46]
+	        getitem_1379: "f32[][]cuda:0" = _foreach_sub_1[47]
+	        getitem_1380: "f32[][]cuda:0" = _foreach_sub_1[48]
+	        getitem_1381: "f32[][]cuda:0" = _foreach_sub_1[49]
+	        getitem_1382: "f32[][]cuda:0" = _foreach_sub_1[50]
+	        getitem_1383: "f32[][]cuda:0" = _foreach_sub_1[51]
+	        getitem_1384: "f32[][]cuda:0" = _foreach_sub_1[52]
+	        getitem_1385: "f32[][]cuda:0" = _foreach_sub_1[53]
+	        getitem_1386: "f32[][]cuda:0" = _foreach_sub_1[54]
+	        getitem_1387: "f32[][]cuda:0" = _foreach_sub_1[55]
+	        getitem_1388: "f32[][]cuda:0" = _foreach_sub_1[56]
+	        getitem_1389: "f32[][]cuda:0" = _foreach_sub_1[57]
+	        getitem_1390: "f32[][]cuda:0" = _foreach_sub_1[58]
+	        getitem_1391: "f32[][]cuda:0" = _foreach_sub_1[59]
+	        getitem_1392: "f32[][]cuda:0" = _foreach_sub_1[60]
+	        getitem_1393: "f32[][]cuda:0" = _foreach_sub_1[61]
+	        getitem_1394: "f32[][]cuda:0" = _foreach_sub_1[62]
+	        getitem_1395: "f32[][]cuda:0" = _foreach_sub_1[63]
+	        getitem_1396: "f32[][]cuda:0" = _foreach_sub_1[64]
+	        getitem_1397: "f32[][]cuda:0" = _foreach_sub_1[65]
+	        getitem_1398: "f32[][]cuda:0" = _foreach_sub_1[66]
+	        getitem_1399: "f32[][]cuda:0" = _foreach_sub_1[67]
+	        getitem_1400: "f32[][]cuda:0" = _foreach_sub_1[68]
+	        getitem_1401: "f32[][]cuda:0" = _foreach_sub_1[69]
+	        getitem_1402: "f32[][]cuda:0" = _foreach_sub_1[70]
+	        getitem_1403: "f32[][]cuda:0" = _foreach_sub_1[71]
+	        getitem_1404: "f32[][]cuda:0" = _foreach_sub_1[72]
+	        getitem_1405: "f32[][]cuda:0" = _foreach_sub_1[73]
+	        getitem_1406: "f32[][]cuda:0" = _foreach_sub_1[74]
+	        getitem_1407: "f32[][]cuda:0" = _foreach_sub_1[75]
+	        getitem_1408: "f32[][]cuda:0" = _foreach_sub_1[76]
+	        getitem_1409: "f32[][]cuda:0" = _foreach_sub_1[77]
+	        getitem_1410: "f32[][]cuda:0" = _foreach_sub_1[78]
+	        getitem_1411: "f32[][]cuda:0" = _foreach_sub_1[79]
+	        getitem_1412: "f32[][]cuda:0" = _foreach_sub_1[80]
+	        getitem_1413: "f32[][]cuda:0" = _foreach_sub_1[81]
+	        getitem_1414: "f32[][]cuda:0" = _foreach_sub_1[82]
+	        getitem_1415: "f32[][]cuda:0" = _foreach_sub_1[83]
+	        getitem_1416: "f32[][]cuda:0" = _foreach_sub_1[84]
+	        getitem_1417: "f32[][]cuda:0" = _foreach_sub_1[85]
+	        getitem_1418: "f32[][]cuda:0" = _foreach_sub_1[86]
+	        getitem_1419: "f32[][]cuda:0" = _foreach_sub_1[87]
+	        getitem_1420: "f32[][]cuda:0" = _foreach_sub_1[88]
+	        getitem_1421: "f32[][]cuda:0" = _foreach_sub_1[89]
+	        getitem_1422: "f32[][]cuda:0" = _foreach_sub_1[90]
+	        getitem_1423: "f32[][]cuda:0" = _foreach_sub_1[91]
+	        getitem_1424: "f32[][]cuda:0" = _foreach_sub_1[92]
+	        getitem_1425: "f32[][]cuda:0" = _foreach_sub_1[93]
+	        getitem_1426: "f32[][]cuda:0" = _foreach_sub_1[94]
+	        getitem_1427: "f32[][]cuda:0" = _foreach_sub_1[95]
+	        getitem_1428: "f32[][]cuda:0" = _foreach_sub_1[96]
+	        getitem_1429: "f32[][]cuda:0" = _foreach_sub_1[97]
+	        getitem_1430: "f32[][]cuda:0" = _foreach_sub_1[98]
+	        getitem_1431: "f32[][]cuda:0" = _foreach_sub_1[99]
+	        getitem_1432: "f32[][]cuda:0" = _foreach_sub_1[100]
+	        getitem_1433: "f32[][]cuda:0" = _foreach_sub_1[101]
+	        getitem_1434: "f32[][]cuda:0" = _foreach_sub_1[102]
+	        getitem_1435: "f32[][]cuda:0" = _foreach_sub_1[103]
+	        getitem_1436: "f32[][]cuda:0" = _foreach_sub_1[104]
+	        getitem_1437: "f32[][]cuda:0" = _foreach_sub_1[105]
+	        getitem_1438: "f32[][]cuda:0" = _foreach_sub_1[106]
+	        getitem_1439: "f32[][]cuda:0" = _foreach_sub_1[107]
+	        getitem_1440: "f32[][]cuda:0" = _foreach_sub_1[108]
+	        getitem_1441: "f32[][]cuda:0" = _foreach_sub_1[109]
+	        getitem_1442: "f32[][]cuda:0" = _foreach_sub_1[110]
+	        getitem_1443: "f32[][]cuda:0" = _foreach_sub_1[111]
+	        getitem_1444: "f32[][]cuda:0" = _foreach_sub_1[112]
+	        getitem_1445: "f32[][]cuda:0" = _foreach_sub_1[113]
+	        getitem_1446: "f32[][]cuda:0" = _foreach_sub_1[114]
+	        getitem_1447: "f32[][]cuda:0" = _foreach_sub_1[115]
+	        getitem_1448: "f32[][]cuda:0" = _foreach_sub_1[116]
+	        getitem_1449: "f32[][]cuda:0" = _foreach_sub_1[117]
+	        getitem_1450: "f32[][]cuda:0" = _foreach_sub_1[118]
+	        getitem_1451: "f32[][]cuda:0" = _foreach_sub_1[119]
+	        getitem_1452: "f32[][]cuda:0" = _foreach_sub_1[120]
+	        getitem_1453: "f32[][]cuda:0" = _foreach_sub_1[121]
+	        getitem_1454: "f32[][]cuda:0" = _foreach_sub_1[122]
+	        getitem_1455: "f32[][]cuda:0" = _foreach_sub_1[123]
+	        getitem_1456: "f32[][]cuda:0" = _foreach_sub_1[124]
+	        getitem_1457: "f32[][]cuda:0" = _foreach_sub_1[125]
+	        getitem_1458: "f32[][]cuda:0" = _foreach_sub_1[126]
+	        getitem_1459: "f32[][]cuda:0" = _foreach_sub_1[127]
+	        getitem_1460: "f32[][]cuda:0" = _foreach_sub_1[128]
+	        getitem_1461: "f32[][]cuda:0" = _foreach_sub_1[129]
+	        getitem_1462: "f32[][]cuda:0" = _foreach_sub_1[130]
+	        getitem_1463: "f32[][]cuda:0" = _foreach_sub_1[131]
+	        getitem_1464: "f32[][]cuda:0" = _foreach_sub_1[132]
+	        getitem_1465: "f32[][]cuda:0" = _foreach_sub_1[133]
+	        getitem_1466: "f32[][]cuda:0" = _foreach_sub_1[134]
+	        getitem_1467: "f32[][]cuda:0" = _foreach_sub_1[135]
+	        getitem_1468: "f32[][]cuda:0" = _foreach_sub_1[136]
+	        getitem_1469: "f32[][]cuda:0" = _foreach_sub_1[137]
+	        getitem_1470: "f32[][]cuda:0" = _foreach_sub_1[138]
+	        getitem_1471: "f32[][]cuda:0" = _foreach_sub_1[139]
+	        getitem_1472: "f32[][]cuda:0" = _foreach_sub_1[140]
+	        getitem_1473: "f32[][]cuda:0" = _foreach_sub_1[141]
+	        getitem_1474: "f32[][]cuda:0" = _foreach_sub_1[142]
+	        getitem_1475: "f32[][]cuda:0" = _foreach_sub_1[143]
+	        getitem_1476: "f32[][]cuda:0" = _foreach_sub_1[144]
+	        getitem_1477: "f32[][]cuda:0" = _foreach_sub_1[145]
+	        getitem_1478: "f32[][]cuda:0" = _foreach_sub_1[146]
+	        getitem_1479: "f32[][]cuda:0" = _foreach_sub_1[147];  _foreach_sub_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1)
+	        _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1);  getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None
+	        getitem_1480: "f32[][]cuda:0" = _foreach_sub_2[0]
+	        getitem_1481: "f32[][]cuda:0" = _foreach_sub_2[1]
+	        getitem_1482: "f32[][]cuda:0" = _foreach_sub_2[2]
+	        getitem_1483: "f32[][]cuda:0" = _foreach_sub_2[3]
+	        getitem_1484: "f32[][]cuda:0" = _foreach_sub_2[4]
+	        getitem_1485: "f32[][]cuda:0" = _foreach_sub_2[5]
+	        getitem_1486: "f32[][]cuda:0" = _foreach_sub_2[6]
+	        getitem_1487: "f32[][]cuda:0" = _foreach_sub_2[7]
+	        getitem_1488: "f32[][]cuda:0" = _foreach_sub_2[8]
+	        getitem_1489: "f32[][]cuda:0" = _foreach_sub_2[9]
+	        getitem_1490: "f32[][]cuda:0" = _foreach_sub_2[10]
+	        getitem_1491: "f32[][]cuda:0" = _foreach_sub_2[11]
+	        getitem_1492: "f32[][]cuda:0" = _foreach_sub_2[12]
+	        getitem_1493: "f32[][]cuda:0" = _foreach_sub_2[13]
+	        getitem_1494: "f32[][]cuda:0" = _foreach_sub_2[14]
+	        getitem_1495: "f32[][]cuda:0" = _foreach_sub_2[15]
+	        getitem_1496: "f32[][]cuda:0" = _foreach_sub_2[16]
+	        getitem_1497: "f32[][]cuda:0" = _foreach_sub_2[17]
+	        getitem_1498: "f32[][]cuda:0" = _foreach_sub_2[18]
+	        getitem_1499: "f32[][]cuda:0" = _foreach_sub_2[19]
+	        getitem_1500: "f32[][]cuda:0" = _foreach_sub_2[20]
+	        getitem_1501: "f32[][]cuda:0" = _foreach_sub_2[21]
+	        getitem_1502: "f32[][]cuda:0" = _foreach_sub_2[22]
+	        getitem_1503: "f32[][]cuda:0" = _foreach_sub_2[23]
+	        getitem_1504: "f32[][]cuda:0" = _foreach_sub_2[24]
+	        getitem_1505: "f32[][]cuda:0" = _foreach_sub_2[25]
+	        getitem_1506: "f32[][]cuda:0" = _foreach_sub_2[26]
+	        getitem_1507: "f32[][]cuda:0" = _foreach_sub_2[27]
+	        getitem_1508: "f32[][]cuda:0" = _foreach_sub_2[28]
+	        getitem_1509: "f32[][]cuda:0" = _foreach_sub_2[29]
+	        getitem_1510: "f32[][]cuda:0" = _foreach_sub_2[30]
+	        getitem_1511: "f32[][]cuda:0" = _foreach_sub_2[31]
+	        getitem_1512: "f32[][]cuda:0" = _foreach_sub_2[32]
+	        getitem_1513: "f32[][]cuda:0" = _foreach_sub_2[33]
+	        getitem_1514: "f32[][]cuda:0" = _foreach_sub_2[34]
+	        getitem_1515: "f32[][]cuda:0" = _foreach_sub_2[35]
+	        getitem_1516: "f32[][]cuda:0" = _foreach_sub_2[36]
+	        getitem_1517: "f32[][]cuda:0" = _foreach_sub_2[37]
+	        getitem_1518: "f32[][]cuda:0" = _foreach_sub_2[38]
+	        getitem_1519: "f32[][]cuda:0" = _foreach_sub_2[39]
+	        getitem_1520: "f32[][]cuda:0" = _foreach_sub_2[40]
+	        getitem_1521: "f32[][]cuda:0" = _foreach_sub_2[41]
+	        getitem_1522: "f32[][]cuda:0" = _foreach_sub_2[42]
+	        getitem_1523: "f32[][]cuda:0" = _foreach_sub_2[43]
+	        getitem_1524: "f32[][]cuda:0" = _foreach_sub_2[44]
+	        getitem_1525: "f32[][]cuda:0" = _foreach_sub_2[45]
+	        getitem_1526: "f32[][]cuda:0" = _foreach_sub_2[46]
+	        getitem_1527: "f32[][]cuda:0" = _foreach_sub_2[47]
+	        getitem_1528: "f32[][]cuda:0" = _foreach_sub_2[48]
+	        getitem_1529: "f32[][]cuda:0" = _foreach_sub_2[49]
+	        getitem_1530: "f32[][]cuda:0" = _foreach_sub_2[50]
+	        getitem_1531: "f32[][]cuda:0" = _foreach_sub_2[51]
+	        getitem_1532: "f32[][]cuda:0" = _foreach_sub_2[52]
+	        getitem_1533: "f32[][]cuda:0" = _foreach_sub_2[53]
+	        getitem_1534: "f32[][]cuda:0" = _foreach_sub_2[54]
+	        getitem_1535: "f32[][]cuda:0" = _foreach_sub_2[55]
+	        getitem_1536: "f32[][]cuda:0" = _foreach_sub_2[56]
+	        getitem_1537: "f32[][]cuda:0" = _foreach_sub_2[57]
+	        getitem_1538: "f32[][]cuda:0" = _foreach_sub_2[58]
+	        getitem_1539: "f32[][]cuda:0" = _foreach_sub_2[59]
+	        getitem_1540: "f32[][]cuda:0" = _foreach_sub_2[60]
+	        getitem_1541: "f32[][]cuda:0" = _foreach_sub_2[61]
+	        getitem_1542: "f32[][]cuda:0" = _foreach_sub_2[62]
+	        getitem_1543: "f32[][]cuda:0" = _foreach_sub_2[63]
+	        getitem_1544: "f32[][]cuda:0" = _foreach_sub_2[64]
+	        getitem_1545: "f32[][]cuda:0" = _foreach_sub_2[65]
+	        getitem_1546: "f32[][]cuda:0" = _foreach_sub_2[66]
+	        getitem_1547: "f32[][]cuda:0" = _foreach_sub_2[67]
+	        getitem_1548: "f32[][]cuda:0" = _foreach_sub_2[68]
+	        getitem_1549: "f32[][]cuda:0" = _foreach_sub_2[69]
+	        getitem_1550: "f32[][]cuda:0" = _foreach_sub_2[70]
+	        getitem_1551: "f32[][]cuda:0" = _foreach_sub_2[71]
+	        getitem_1552: "f32[][]cuda:0" = _foreach_sub_2[72]
+	        getitem_1553: "f32[][]cuda:0" = _foreach_sub_2[73]
+	        getitem_1554: "f32[][]cuda:0" = _foreach_sub_2[74]
+	        getitem_1555: "f32[][]cuda:0" = _foreach_sub_2[75]
+	        getitem_1556: "f32[][]cuda:0" = _foreach_sub_2[76]
+	        getitem_1557: "f32[][]cuda:0" = _foreach_sub_2[77]
+	        getitem_1558: "f32[][]cuda:0" = _foreach_sub_2[78]
+	        getitem_1559: "f32[][]cuda:0" = _foreach_sub_2[79]
+	        getitem_1560: "f32[][]cuda:0" = _foreach_sub_2[80]
+	        getitem_1561: "f32[][]cuda:0" = _foreach_sub_2[81]
+	        getitem_1562: "f32[][]cuda:0" = _foreach_sub_2[82]
+	        getitem_1563: "f32[][]cuda:0" = _foreach_sub_2[83]
+	        getitem_1564: "f32[][]cuda:0" = _foreach_sub_2[84]
+	        getitem_1565: "f32[][]cuda:0" = _foreach_sub_2[85]
+	        getitem_1566: "f32[][]cuda:0" = _foreach_sub_2[86]
+	        getitem_1567: "f32[][]cuda:0" = _foreach_sub_2[87]
+	        getitem_1568: "f32[][]cuda:0" = _foreach_sub_2[88]
+	        getitem_1569: "f32[][]cuda:0" = _foreach_sub_2[89]
+	        getitem_1570: "f32[][]cuda:0" = _foreach_sub_2[90]
+	        getitem_1571: "f32[][]cuda:0" = _foreach_sub_2[91]
+	        getitem_1572: "f32[][]cuda:0" = _foreach_sub_2[92]
+	        getitem_1573: "f32[][]cuda:0" = _foreach_sub_2[93]
+	        getitem_1574: "f32[][]cuda:0" = _foreach_sub_2[94]
+	        getitem_1575: "f32[][]cuda:0" = _foreach_sub_2[95]
+	        getitem_1576: "f32[][]cuda:0" = _foreach_sub_2[96]
+	        getitem_1577: "f32[][]cuda:0" = _foreach_sub_2[97]
+	        getitem_1578: "f32[][]cuda:0" = _foreach_sub_2[98]
+	        getitem_1579: "f32[][]cuda:0" = _foreach_sub_2[99]
+	        getitem_1580: "f32[][]cuda:0" = _foreach_sub_2[100]
+	        getitem_1581: "f32[][]cuda:0" = _foreach_sub_2[101]
+	        getitem_1582: "f32[][]cuda:0" = _foreach_sub_2[102]
+	        getitem_1583: "f32[][]cuda:0" = _foreach_sub_2[103]
+	        getitem_1584: "f32[][]cuda:0" = _foreach_sub_2[104]
+	        getitem_1585: "f32[][]cuda:0" = _foreach_sub_2[105]
+	        getitem_1586: "f32[][]cuda:0" = _foreach_sub_2[106]
+	        getitem_1587: "f32[][]cuda:0" = _foreach_sub_2[107]
+	        getitem_1588: "f32[][]cuda:0" = _foreach_sub_2[108]
+	        getitem_1589: "f32[][]cuda:0" = _foreach_sub_2[109]
+	        getitem_1590: "f32[][]cuda:0" = _foreach_sub_2[110]
+	        getitem_1591: "f32[][]cuda:0" = _foreach_sub_2[111]
+	        getitem_1592: "f32[][]cuda:0" = _foreach_sub_2[112]
+	        getitem_1593: "f32[][]cuda:0" = _foreach_sub_2[113]
+	        getitem_1594: "f32[][]cuda:0" = _foreach_sub_2[114]
+	        getitem_1595: "f32[][]cuda:0" = _foreach_sub_2[115]
+	        getitem_1596: "f32[][]cuda:0" = _foreach_sub_2[116]
+	        getitem_1597: "f32[][]cuda:0" = _foreach_sub_2[117]
+	        getitem_1598: "f32[][]cuda:0" = _foreach_sub_2[118]
+	        getitem_1599: "f32[][]cuda:0" = _foreach_sub_2[119]
+	        getitem_1600: "f32[][]cuda:0" = _foreach_sub_2[120]
+	        getitem_1601: "f32[][]cuda:0" = _foreach_sub_2[121]
+	        getitem_1602: "f32[][]cuda:0" = _foreach_sub_2[122]
+	        getitem_1603: "f32[][]cuda:0" = _foreach_sub_2[123]
+	        getitem_1604: "f32[][]cuda:0" = _foreach_sub_2[124]
+	        getitem_1605: "f32[][]cuda:0" = _foreach_sub_2[125]
+	        getitem_1606: "f32[][]cuda:0" = _foreach_sub_2[126]
+	        getitem_1607: "f32[][]cuda:0" = _foreach_sub_2[127]
+	        getitem_1608: "f32[][]cuda:0" = _foreach_sub_2[128]
+	        getitem_1609: "f32[][]cuda:0" = _foreach_sub_2[129]
+	        getitem_1610: "f32[][]cuda:0" = _foreach_sub_2[130]
+	        getitem_1611: "f32[][]cuda:0" = _foreach_sub_2[131]
+	        getitem_1612: "f32[][]cuda:0" = _foreach_sub_2[132]
+	        getitem_1613: "f32[][]cuda:0" = _foreach_sub_2[133]
+	        getitem_1614: "f32[][]cuda:0" = _foreach_sub_2[134]
+	        getitem_1615: "f32[][]cuda:0" = _foreach_sub_2[135]
+	        getitem_1616: "f32[][]cuda:0" = _foreach_sub_2[136]
+	        getitem_1617: "f32[][]cuda:0" = _foreach_sub_2[137]
+	        getitem_1618: "f32[][]cuda:0" = _foreach_sub_2[138]
+	        getitem_1619: "f32[][]cuda:0" = _foreach_sub_2[139]
+	        getitem_1620: "f32[][]cuda:0" = _foreach_sub_2[140]
+	        getitem_1621: "f32[][]cuda:0" = _foreach_sub_2[141]
+	        getitem_1622: "f32[][]cuda:0" = _foreach_sub_2[142]
+	        getitem_1623: "f32[][]cuda:0" = _foreach_sub_2[143]
+	        getitem_1624: "f32[][]cuda:0" = _foreach_sub_2[144]
+	        getitem_1625: "f32[][]cuda:0" = _foreach_sub_2[145]
+	        getitem_1626: "f32[][]cuda:0" = _foreach_sub_2[146]
+	        getitem_1627: "f32[][]cuda:0" = _foreach_sub_2[147];  _foreach_sub_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2)
+	        _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]);  getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None
+	        getitem_1628: "f32[][]cuda:0" = _foreach_neg[0]
+	        getitem_1629: "f32[][]cuda:0" = _foreach_neg[1]
+	        getitem_1630: "f32[][]cuda:0" = _foreach_neg[2]
+	        getitem_1631: "f32[][]cuda:0" = _foreach_neg[3]
+	        getitem_1632: "f32[][]cuda:0" = _foreach_neg[4]
+	        getitem_1633: "f32[][]cuda:0" = _foreach_neg[5]
+	        getitem_1634: "f32[][]cuda:0" = _foreach_neg[6]
+	        getitem_1635: "f32[][]cuda:0" = _foreach_neg[7]
+	        getitem_1636: "f32[][]cuda:0" = _foreach_neg[8]
+	        getitem_1637: "f32[][]cuda:0" = _foreach_neg[9]
+	        getitem_1638: "f32[][]cuda:0" = _foreach_neg[10]
+	        getitem_1639: "f32[][]cuda:0" = _foreach_neg[11]
+	        getitem_1640: "f32[][]cuda:0" = _foreach_neg[12]
+	        getitem_1641: "f32[][]cuda:0" = _foreach_neg[13]
+	        getitem_1642: "f32[][]cuda:0" = _foreach_neg[14]
+	        getitem_1643: "f32[][]cuda:0" = _foreach_neg[15]
+	        getitem_1644: "f32[][]cuda:0" = _foreach_neg[16]
+	        getitem_1645: "f32[][]cuda:0" = _foreach_neg[17]
+	        getitem_1646: "f32[][]cuda:0" = _foreach_neg[18]
+	        getitem_1647: "f32[][]cuda:0" = _foreach_neg[19]
+	        getitem_1648: "f32[][]cuda:0" = _foreach_neg[20]
+	        getitem_1649: "f32[][]cuda:0" = _foreach_neg[21]
+	        getitem_1650: "f32[][]cuda:0" = _foreach_neg[22]
+	        getitem_1651: "f32[][]cuda:0" = _foreach_neg[23]
+	        getitem_1652: "f32[][]cuda:0" = _foreach_neg[24]
+	        getitem_1653: "f32[][]cuda:0" = _foreach_neg[25]
+	        getitem_1654: "f32[][]cuda:0" = _foreach_neg[26]
+	        getitem_1655: "f32[][]cuda:0" = _foreach_neg[27]
+	        getitem_1656: "f32[][]cuda:0" = _foreach_neg[28]
+	        getitem_1657: "f32[][]cuda:0" = _foreach_neg[29]
+	        getitem_1658: "f32[][]cuda:0" = _foreach_neg[30]
+	        getitem_1659: "f32[][]cuda:0" = _foreach_neg[31]
+	        getitem_1660: "f32[][]cuda:0" = _foreach_neg[32]
+	        getitem_1661: "f32[][]cuda:0" = _foreach_neg[33]
+	        getitem_1662: "f32[][]cuda:0" = _foreach_neg[34]
+	        getitem_1663: "f32[][]cuda:0" = _foreach_neg[35]
+	        getitem_1664: "f32[][]cuda:0" = _foreach_neg[36]
+	        getitem_1665: "f32[][]cuda:0" = _foreach_neg[37]
+	        getitem_1666: "f32[][]cuda:0" = _foreach_neg[38]
+	        getitem_1667: "f32[][]cuda:0" = _foreach_neg[39]
+	        getitem_1668: "f32[][]cuda:0" = _foreach_neg[40]
+	        getitem_1669: "f32[][]cuda:0" = _foreach_neg[41]
+	        getitem_1670: "f32[][]cuda:0" = _foreach_neg[42]
+	        getitem_1671: "f32[][]cuda:0" = _foreach_neg[43]
+	        getitem_1672: "f32[][]cuda:0" = _foreach_neg[44]
+	        getitem_1673: "f32[][]cuda:0" = _foreach_neg[45]
+	        getitem_1674: "f32[][]cuda:0" = _foreach_neg[46]
+	        getitem_1675: "f32[][]cuda:0" = _foreach_neg[47]
+	        getitem_1676: "f32[][]cuda:0" = _foreach_neg[48]
+	        getitem_1677: "f32[][]cuda:0" = _foreach_neg[49]
+	        getitem_1678: "f32[][]cuda:0" = _foreach_neg[50]
+	        getitem_1679: "f32[][]cuda:0" = _foreach_neg[51]
+	        getitem_1680: "f32[][]cuda:0" = _foreach_neg[52]
+	        getitem_1681: "f32[][]cuda:0" = _foreach_neg[53]
+	        getitem_1682: "f32[][]cuda:0" = _foreach_neg[54]
+	        getitem_1683: "f32[][]cuda:0" = _foreach_neg[55]
+	        getitem_1684: "f32[][]cuda:0" = _foreach_neg[56]
+	        getitem_1685: "f32[][]cuda:0" = _foreach_neg[57]
+	        getitem_1686: "f32[][]cuda:0" = _foreach_neg[58]
+	        getitem_1687: "f32[][]cuda:0" = _foreach_neg[59]
+	        getitem_1688: "f32[][]cuda:0" = _foreach_neg[60]
+	        getitem_1689: "f32[][]cuda:0" = _foreach_neg[61]
+	        getitem_1690: "f32[][]cuda:0" = _foreach_neg[62]
+	        getitem_1691: "f32[][]cuda:0" = _foreach_neg[63]
+	        getitem_1692: "f32[][]cuda:0" = _foreach_neg[64]
+	        getitem_1693: "f32[][]cuda:0" = _foreach_neg[65]
+	        getitem_1694: "f32[][]cuda:0" = _foreach_neg[66]
+	        getitem_1695: "f32[][]cuda:0" = _foreach_neg[67]
+	        getitem_1696: "f32[][]cuda:0" = _foreach_neg[68]
+	        getitem_1697: "f32[][]cuda:0" = _foreach_neg[69]
+	        getitem_1698: "f32[][]cuda:0" = _foreach_neg[70]
+	        getitem_1699: "f32[][]cuda:0" = _foreach_neg[71]
+	        getitem_1700: "f32[][]cuda:0" = _foreach_neg[72]
+	        getitem_1701: "f32[][]cuda:0" = _foreach_neg[73]
+	        getitem_1702: "f32[][]cuda:0" = _foreach_neg[74]
+	        getitem_1703: "f32[][]cuda:0" = _foreach_neg[75]
+	        getitem_1704: "f32[][]cuda:0" = _foreach_neg[76]
+	        getitem_1705: "f32[][]cuda:0" = _foreach_neg[77]
+	        getitem_1706: "f32[][]cuda:0" = _foreach_neg[78]
+	        getitem_1707: "f32[][]cuda:0" = _foreach_neg[79]
+	        getitem_1708: "f32[][]cuda:0" = _foreach_neg[80]
+	        getitem_1709: "f32[][]cuda:0" = _foreach_neg[81]
+	        getitem_1710: "f32[][]cuda:0" = _foreach_neg[82]
+	        getitem_1711: "f32[][]cuda:0" = _foreach_neg[83]
+	        getitem_1712: "f32[][]cuda:0" = _foreach_neg[84]
+	        getitem_1713: "f32[][]cuda:0" = _foreach_neg[85]
+	        getitem_1714: "f32[][]cuda:0" = _foreach_neg[86]
+	        getitem_1715: "f32[][]cuda:0" = _foreach_neg[87]
+	        getitem_1716: "f32[][]cuda:0" = _foreach_neg[88]
+	        getitem_1717: "f32[][]cuda:0" = _foreach_neg[89]
+	        getitem_1718: "f32[][]cuda:0" = _foreach_neg[90]
+	        getitem_1719: "f32[][]cuda:0" = _foreach_neg[91]
+	        getitem_1720: "f32[][]cuda:0" = _foreach_neg[92]
+	        getitem_1721: "f32[][]cuda:0" = _foreach_neg[93]
+	        getitem_1722: "f32[][]cuda:0" = _foreach_neg[94]
+	        getitem_1723: "f32[][]cuda:0" = _foreach_neg[95]
+	        getitem_1724: "f32[][]cuda:0" = _foreach_neg[96]
+	        getitem_1725: "f32[][]cuda:0" = _foreach_neg[97]
+	        getitem_1726: "f32[][]cuda:0" = _foreach_neg[98]
+	        getitem_1727: "f32[][]cuda:0" = _foreach_neg[99]
+	        getitem_1728: "f32[][]cuda:0" = _foreach_neg[100]
+	        getitem_1729: "f32[][]cuda:0" = _foreach_neg[101]
+	        getitem_1730: "f32[][]cuda:0" = _foreach_neg[102]
+	        getitem_1731: "f32[][]cuda:0" = _foreach_neg[103]
+	        getitem_1732: "f32[][]cuda:0" = _foreach_neg[104]
+	        getitem_1733: "f32[][]cuda:0" = _foreach_neg[105]
+	        getitem_1734: "f32[][]cuda:0" = _foreach_neg[106]
+	        getitem_1735: "f32[][]cuda:0" = _foreach_neg[107]
+	        getitem_1736: "f32[][]cuda:0" = _foreach_neg[108]
+	        getitem_1737: "f32[][]cuda:0" = _foreach_neg[109]
+	        getitem_1738: "f32[][]cuda:0" = _foreach_neg[110]
+	        getitem_1739: "f32[][]cuda:0" = _foreach_neg[111]
+	        getitem_1740: "f32[][]cuda:0" = _foreach_neg[112]
+	        getitem_1741: "f32[][]cuda:0" = _foreach_neg[113]
+	        getitem_1742: "f32[][]cuda:0" = _foreach_neg[114]
+	        getitem_1743: "f32[][]cuda:0" = _foreach_neg[115]
+	        getitem_1744: "f32[][]cuda:0" = _foreach_neg[116]
+	        getitem_1745: "f32[][]cuda:0" = _foreach_neg[117]
+	        getitem_1746: "f32[][]cuda:0" = _foreach_neg[118]
+	        getitem_1747: "f32[][]cuda:0" = _foreach_neg[119]
+	        getitem_1748: "f32[][]cuda:0" = _foreach_neg[120]
+	        getitem_1749: "f32[][]cuda:0" = _foreach_neg[121]
+	        getitem_1750: "f32[][]cuda:0" = _foreach_neg[122]
+	        getitem_1751: "f32[][]cuda:0" = _foreach_neg[123]
+	        getitem_1752: "f32[][]cuda:0" = _foreach_neg[124]
+	        getitem_1753: "f32[][]cuda:0" = _foreach_neg[125]
+	        getitem_1754: "f32[][]cuda:0" = _foreach_neg[126]
+	        getitem_1755: "f32[][]cuda:0" = _foreach_neg[127]
+	        getitem_1756: "f32[][]cuda:0" = _foreach_neg[128]
+	        getitem_1757: "f32[][]cuda:0" = _foreach_neg[129]
+	        getitem_1758: "f32[][]cuda:0" = _foreach_neg[130]
+	        getitem_1759: "f32[][]cuda:0" = _foreach_neg[131]
+	        getitem_1760: "f32[][]cuda:0" = _foreach_neg[132]
+	        getitem_1761: "f32[][]cuda:0" = _foreach_neg[133]
+	        getitem_1762: "f32[][]cuda:0" = _foreach_neg[134]
+	        getitem_1763: "f32[][]cuda:0" = _foreach_neg[135]
+	        getitem_1764: "f32[][]cuda:0" = _foreach_neg[136]
+	        getitem_1765: "f32[][]cuda:0" = _foreach_neg[137]
+	        getitem_1766: "f32[][]cuda:0" = _foreach_neg[138]
+	        getitem_1767: "f32[][]cuda:0" = _foreach_neg[139]
+	        getitem_1768: "f32[][]cuda:0" = _foreach_neg[140]
+	        getitem_1769: "f32[][]cuda:0" = _foreach_neg[141]
+	        getitem_1770: "f32[][]cuda:0" = _foreach_neg[142]
+	        getitem_1771: "f32[][]cuda:0" = _foreach_neg[143]
+	        getitem_1772: "f32[][]cuda:0" = _foreach_neg[144]
+	        getitem_1773: "f32[][]cuda:0" = _foreach_neg[145]
+	        getitem_1774: "f32[][]cuda:0" = _foreach_neg[146]
+	        getitem_1775: "f32[][]cuda:0" = _foreach_neg[147];  _foreach_neg = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr)
+	        _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01);  getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None
+	        getitem_1776: "f32[][]cuda:0" = _foreach_div[0]
+	        getitem_1777: "f32[][]cuda:0" = _foreach_div[1]
+	        getitem_1778: "f32[][]cuda:0" = _foreach_div[2]
+	        getitem_1779: "f32[][]cuda:0" = _foreach_div[3]
+	        getitem_1780: "f32[][]cuda:0" = _foreach_div[4]
+	        getitem_1781: "f32[][]cuda:0" = _foreach_div[5]
+	        getitem_1782: "f32[][]cuda:0" = _foreach_div[6]
+	        getitem_1783: "f32[][]cuda:0" = _foreach_div[7]
+	        getitem_1784: "f32[][]cuda:0" = _foreach_div[8]
+	        getitem_1785: "f32[][]cuda:0" = _foreach_div[9]
+	        getitem_1786: "f32[][]cuda:0" = _foreach_div[10]
+	        getitem_1787: "f32[][]cuda:0" = _foreach_div[11]
+	        getitem_1788: "f32[][]cuda:0" = _foreach_div[12]
+	        getitem_1789: "f32[][]cuda:0" = _foreach_div[13]
+	        getitem_1790: "f32[][]cuda:0" = _foreach_div[14]
+	        getitem_1791: "f32[][]cuda:0" = _foreach_div[15]
+	        getitem_1792: "f32[][]cuda:0" = _foreach_div[16]
+	        getitem_1793: "f32[][]cuda:0" = _foreach_div[17]
+	        getitem_1794: "f32[][]cuda:0" = _foreach_div[18]
+	        getitem_1795: "f32[][]cuda:0" = _foreach_div[19]
+	        getitem_1796: "f32[][]cuda:0" = _foreach_div[20]
+	        getitem_1797: "f32[][]cuda:0" = _foreach_div[21]
+	        getitem_1798: "f32[][]cuda:0" = _foreach_div[22]
+	        getitem_1799: "f32[][]cuda:0" = _foreach_div[23]
+	        getitem_1800: "f32[][]cuda:0" = _foreach_div[24]
+	        getitem_1801: "f32[][]cuda:0" = _foreach_div[25]
+	        getitem_1802: "f32[][]cuda:0" = _foreach_div[26]
+	        getitem_1803: "f32[][]cuda:0" = _foreach_div[27]
+	        getitem_1804: "f32[][]cuda:0" = _foreach_div[28]
+	        getitem_1805: "f32[][]cuda:0" = _foreach_div[29]
+	        getitem_1806: "f32[][]cuda:0" = _foreach_div[30]
+	        getitem_1807: "f32[][]cuda:0" = _foreach_div[31]
+	        getitem_1808: "f32[][]cuda:0" = _foreach_div[32]
+	        getitem_1809: "f32[][]cuda:0" = _foreach_div[33]
+	        getitem_1810: "f32[][]cuda:0" = _foreach_div[34]
+	        getitem_1811: "f32[][]cuda:0" = _foreach_div[35]
+	        getitem_1812: "f32[][]cuda:0" = _foreach_div[36]
+	        getitem_1813: "f32[][]cuda:0" = _foreach_div[37]
+	        getitem_1814: "f32[][]cuda:0" = _foreach_div[38]
+	        getitem_1815: "f32[][]cuda:0" = _foreach_div[39]
+	        getitem_1816: "f32[][]cuda:0" = _foreach_div[40]
+	        getitem_1817: "f32[][]cuda:0" = _foreach_div[41]
+	        getitem_1818: "f32[][]cuda:0" = _foreach_div[42]
+	        getitem_1819: "f32[][]cuda:0" = _foreach_div[43]
+	        getitem_1820: "f32[][]cuda:0" = _foreach_div[44]
+	        getitem_1821: "f32[][]cuda:0" = _foreach_div[45]
+	        getitem_1822: "f32[][]cuda:0" = _foreach_div[46]
+	        getitem_1823: "f32[][]cuda:0" = _foreach_div[47]
+	        getitem_1824: "f32[][]cuda:0" = _foreach_div[48]
+	        getitem_1825: "f32[][]cuda:0" = _foreach_div[49]
+	        getitem_1826: "f32[][]cuda:0" = _foreach_div[50]
+	        getitem_1827: "f32[][]cuda:0" = _foreach_div[51]
+	        getitem_1828: "f32[][]cuda:0" = _foreach_div[52]
+	        getitem_1829: "f32[][]cuda:0" = _foreach_div[53]
+	        getitem_1830: "f32[][]cuda:0" = _foreach_div[54]
+	        getitem_1831: "f32[][]cuda:0" = _foreach_div[55]
+	        getitem_1832: "f32[][]cuda:0" = _foreach_div[56]
+	        getitem_1833: "f32[][]cuda:0" = _foreach_div[57]
+	        getitem_1834: "f32[][]cuda:0" = _foreach_div[58]
+	        getitem_1835: "f32[][]cuda:0" = _foreach_div[59]
+	        getitem_1836: "f32[][]cuda:0" = _foreach_div[60]
+	        getitem_1837: "f32[][]cuda:0" = _foreach_div[61]
+	        getitem_1838: "f32[][]cuda:0" = _foreach_div[62]
+	        getitem_1839: "f32[][]cuda:0" = _foreach_div[63]
+	        getitem_1840: "f32[][]cuda:0" = _foreach_div[64]
+	        getitem_1841: "f32[][]cuda:0" = _foreach_div[65]
+	        getitem_1842: "f32[][]cuda:0" = _foreach_div[66]
+	        getitem_1843: "f32[][]cuda:0" = _foreach_div[67]
+	        getitem_1844: "f32[][]cuda:0" = _foreach_div[68]
+	        getitem_1845: "f32[][]cuda:0" = _foreach_div[69]
+	        getitem_1846: "f32[][]cuda:0" = _foreach_div[70]
+	        getitem_1847: "f32[][]cuda:0" = _foreach_div[71]
+	        getitem_1848: "f32[][]cuda:0" = _foreach_div[72]
+	        getitem_1849: "f32[][]cuda:0" = _foreach_div[73]
+	        getitem_1850: "f32[][]cuda:0" = _foreach_div[74]
+	        getitem_1851: "f32[][]cuda:0" = _foreach_div[75]
+	        getitem_1852: "f32[][]cuda:0" = _foreach_div[76]
+	        getitem_1853: "f32[][]cuda:0" = _foreach_div[77]
+	        getitem_1854: "f32[][]cuda:0" = _foreach_div[78]
+	        getitem_1855: "f32[][]cuda:0" = _foreach_div[79]
+	        getitem_1856: "f32[][]cuda:0" = _foreach_div[80]
+	        getitem_1857: "f32[][]cuda:0" = _foreach_div[81]
+	        getitem_1858: "f32[][]cuda:0" = _foreach_div[82]
+	        getitem_1859: "f32[][]cuda:0" = _foreach_div[83]
+	        getitem_1860: "f32[][]cuda:0" = _foreach_div[84]
+	        getitem_1861: "f32[][]cuda:0" = _foreach_div[85]
+	        getitem_1862: "f32[][]cuda:0" = _foreach_div[86]
+	        getitem_1863: "f32[][]cuda:0" = _foreach_div[87]
+	        getitem_1864: "f32[][]cuda:0" = _foreach_div[88]
+	        getitem_1865: "f32[][]cuda:0" = _foreach_div[89]
+	        getitem_1866: "f32[][]cuda:0" = _foreach_div[90]
+	        getitem_1867: "f32[][]cuda:0" = _foreach_div[91]
+	        getitem_1868: "f32[][]cuda:0" = _foreach_div[92]
+	        getitem_1869: "f32[][]cuda:0" = _foreach_div[93]
+	        getitem_1870: "f32[][]cuda:0" = _foreach_div[94]
+	        getitem_1871: "f32[][]cuda:0" = _foreach_div[95]
+	        getitem_1872: "f32[][]cuda:0" = _foreach_div[96]
+	        getitem_1873: "f32[][]cuda:0" = _foreach_div[97]
+	        getitem_1874: "f32[][]cuda:0" = _foreach_div[98]
+	        getitem_1875: "f32[][]cuda:0" = _foreach_div[99]
+	        getitem_1876: "f32[][]cuda:0" = _foreach_div[100]
+	        getitem_1877: "f32[][]cuda:0" = _foreach_div[101]
+	        getitem_1878: "f32[][]cuda:0" = _foreach_div[102]
+	        getitem_1879: "f32[][]cuda:0" = _foreach_div[103]
+	        getitem_1880: "f32[][]cuda:0" = _foreach_div[104]
+	        getitem_1881: "f32[][]cuda:0" = _foreach_div[105]
+	        getitem_1882: "f32[][]cuda:0" = _foreach_div[106]
+	        getitem_1883: "f32[][]cuda:0" = _foreach_div[107]
+	        getitem_1884: "f32[][]cuda:0" = _foreach_div[108]
+	        getitem_1885: "f32[][]cuda:0" = _foreach_div[109]
+	        getitem_1886: "f32[][]cuda:0" = _foreach_div[110]
+	        getitem_1887: "f32[][]cuda:0" = _foreach_div[111]
+	        getitem_1888: "f32[][]cuda:0" = _foreach_div[112]
+	        getitem_1889: "f32[][]cuda:0" = _foreach_div[113]
+	        getitem_1890: "f32[][]cuda:0" = _foreach_div[114]
+	        getitem_1891: "f32[][]cuda:0" = _foreach_div[115]
+	        getitem_1892: "f32[][]cuda:0" = _foreach_div[116]
+	        getitem_1893: "f32[][]cuda:0" = _foreach_div[117]
+	        getitem_1894: "f32[][]cuda:0" = _foreach_div[118]
+	        getitem_1895: "f32[][]cuda:0" = _foreach_div[119]
+	        getitem_1896: "f32[][]cuda:0" = _foreach_div[120]
+	        getitem_1897: "f32[][]cuda:0" = _foreach_div[121]
+	        getitem_1898: "f32[][]cuda:0" = _foreach_div[122]
+	        getitem_1899: "f32[][]cuda:0" = _foreach_div[123]
+	        getitem_1900: "f32[][]cuda:0" = _foreach_div[124]
+	        getitem_1901: "f32[][]cuda:0" = _foreach_div[125]
+	        getitem_1902: "f32[][]cuda:0" = _foreach_div[126]
+	        getitem_1903: "f32[][]cuda:0" = _foreach_div[127]
+	        getitem_1904: "f32[][]cuda:0" = _foreach_div[128]
+	        getitem_1905: "f32[][]cuda:0" = _foreach_div[129]
+	        getitem_1906: "f32[][]cuda:0" = _foreach_div[130]
+	        getitem_1907: "f32[][]cuda:0" = _foreach_div[131]
+	        getitem_1908: "f32[][]cuda:0" = _foreach_div[132]
+	        getitem_1909: "f32[][]cuda:0" = _foreach_div[133]
+	        getitem_1910: "f32[][]cuda:0" = _foreach_div[134]
+	        getitem_1911: "f32[][]cuda:0" = _foreach_div[135]
+	        getitem_1912: "f32[][]cuda:0" = _foreach_div[136]
+	        getitem_1913: "f32[][]cuda:0" = _foreach_div[137]
+	        getitem_1914: "f32[][]cuda:0" = _foreach_div[138]
+	        getitem_1915: "f32[][]cuda:0" = _foreach_div[139]
+	        getitem_1916: "f32[][]cuda:0" = _foreach_div[140]
+	        getitem_1917: "f32[][]cuda:0" = _foreach_div[141]
+	        getitem_1918: "f32[][]cuda:0" = _foreach_div[142]
+	        getitem_1919: "f32[][]cuda:0" = _foreach_div[143]
+	        getitem_1920: "f32[][]cuda:0" = _foreach_div[144]
+	        getitem_1921: "f32[][]cuda:0" = _foreach_div[145]
+	        getitem_1922: "f32[][]cuda:0" = _foreach_div[146]
+	        getitem_1923: "f32[][]cuda:0" = _foreach_div[147];  _foreach_div = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1)
+	        _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]);  getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None
+	        getitem_1924: "f32[][]cuda:0" = _foreach_reciprocal[0]
+	        getitem_1925: "f32[][]cuda:0" = _foreach_reciprocal[1]
+	        getitem_1926: "f32[][]cuda:0" = _foreach_reciprocal[2]
+	        getitem_1927: "f32[][]cuda:0" = _foreach_reciprocal[3]
+	        getitem_1928: "f32[][]cuda:0" = _foreach_reciprocal[4]
+	        getitem_1929: "f32[][]cuda:0" = _foreach_reciprocal[5]
+	        getitem_1930: "f32[][]cuda:0" = _foreach_reciprocal[6]
+	        getitem_1931: "f32[][]cuda:0" = _foreach_reciprocal[7]
+	        getitem_1932: "f32[][]cuda:0" = _foreach_reciprocal[8]
+	        getitem_1933: "f32[][]cuda:0" = _foreach_reciprocal[9]
+	        getitem_1934: "f32[][]cuda:0" = _foreach_reciprocal[10]
+	        getitem_1935: "f32[][]cuda:0" = _foreach_reciprocal[11]
+	        getitem_1936: "f32[][]cuda:0" = _foreach_reciprocal[12]
+	        getitem_1937: "f32[][]cuda:0" = _foreach_reciprocal[13]
+	        getitem_1938: "f32[][]cuda:0" = _foreach_reciprocal[14]
+	        getitem_1939: "f32[][]cuda:0" = _foreach_reciprocal[15]
+	        getitem_1940: "f32[][]cuda:0" = _foreach_reciprocal[16]
+	        getitem_1941: "f32[][]cuda:0" = _foreach_reciprocal[17]
+	        getitem_1942: "f32[][]cuda:0" = _foreach_reciprocal[18]
+	        getitem_1943: "f32[][]cuda:0" = _foreach_reciprocal[19]
+	        getitem_1944: "f32[][]cuda:0" = _foreach_reciprocal[20]
+	        getitem_1945: "f32[][]cuda:0" = _foreach_reciprocal[21]
+	        getitem_1946: "f32[][]cuda:0" = _foreach_reciprocal[22]
+	        getitem_1947: "f32[][]cuda:0" = _foreach_reciprocal[23]
+	        getitem_1948: "f32[][]cuda:0" = _foreach_reciprocal[24]
+	        getitem_1949: "f32[][]cuda:0" = _foreach_reciprocal[25]
+	        getitem_1950: "f32[][]cuda:0" = _foreach_reciprocal[26]
+	        getitem_1951: "f32[][]cuda:0" = _foreach_reciprocal[27]
+	        getitem_1952: "f32[][]cuda:0" = _foreach_reciprocal[28]
+	        getitem_1953: "f32[][]cuda:0" = _foreach_reciprocal[29]
+	        getitem_1954: "f32[][]cuda:0" = _foreach_reciprocal[30]
+	        getitem_1955: "f32[][]cuda:0" = _foreach_reciprocal[31]
+	        getitem_1956: "f32[][]cuda:0" = _foreach_reciprocal[32]
+	        getitem_1957: "f32[][]cuda:0" = _foreach_reciprocal[33]
+	        getitem_1958: "f32[][]cuda:0" = _foreach_reciprocal[34]
+	        getitem_1959: "f32[][]cuda:0" = _foreach_reciprocal[35]
+	        getitem_1960: "f32[][]cuda:0" = _foreach_reciprocal[36]
+	        getitem_1961: "f32[][]cuda:0" = _foreach_reciprocal[37]
+	        getitem_1962: "f32[][]cuda:0" = _foreach_reciprocal[38]
+	        getitem_1963: "f32[][]cuda:0" = _foreach_reciprocal[39]
+	        getitem_1964: "f32[][]cuda:0" = _foreach_reciprocal[40]
+	        getitem_1965: "f32[][]cuda:0" = _foreach_reciprocal[41]
+	        getitem_1966: "f32[][]cuda:0" = _foreach_reciprocal[42]
+	        getitem_1967: "f32[][]cuda:0" = _foreach_reciprocal[43]
+	        getitem_1968: "f32[][]cuda:0" = _foreach_reciprocal[44]
+	        getitem_1969: "f32[][]cuda:0" = _foreach_reciprocal[45]
+	        getitem_1970: "f32[][]cuda:0" = _foreach_reciprocal[46]
+	        getitem_1971: "f32[][]cuda:0" = _foreach_reciprocal[47]
+	        getitem_1972: "f32[][]cuda:0" = _foreach_reciprocal[48]
+	        getitem_1973: "f32[][]cuda:0" = _foreach_reciprocal[49]
+	        getitem_1974: "f32[][]cuda:0" = _foreach_reciprocal[50]
+	        getitem_1975: "f32[][]cuda:0" = _foreach_reciprocal[51]
+	        getitem_1976: "f32[][]cuda:0" = _foreach_reciprocal[52]
+	        getitem_1977: "f32[][]cuda:0" = _foreach_reciprocal[53]
+	        getitem_1978: "f32[][]cuda:0" = _foreach_reciprocal[54]
+	        getitem_1979: "f32[][]cuda:0" = _foreach_reciprocal[55]
+	        getitem_1980: "f32[][]cuda:0" = _foreach_reciprocal[56]
+	        getitem_1981: "f32[][]cuda:0" = _foreach_reciprocal[57]
+	        getitem_1982: "f32[][]cuda:0" = _foreach_reciprocal[58]
+	        getitem_1983: "f32[][]cuda:0" = _foreach_reciprocal[59]
+	        getitem_1984: "f32[][]cuda:0" = _foreach_reciprocal[60]
+	        getitem_1985: "f32[][]cuda:0" = _foreach_reciprocal[61]
+	        getitem_1986: "f32[][]cuda:0" = _foreach_reciprocal[62]
+	        getitem_1987: "f32[][]cuda:0" = _foreach_reciprocal[63]
+	        getitem_1988: "f32[][]cuda:0" = _foreach_reciprocal[64]
+	        getitem_1989: "f32[][]cuda:0" = _foreach_reciprocal[65]
+	        getitem_1990: "f32[][]cuda:0" = _foreach_reciprocal[66]
+	        getitem_1991: "f32[][]cuda:0" = _foreach_reciprocal[67]
+	        getitem_1992: "f32[][]cuda:0" = _foreach_reciprocal[68]
+	        getitem_1993: "f32[][]cuda:0" = _foreach_reciprocal[69]
+	        getitem_1994: "f32[][]cuda:0" = _foreach_reciprocal[70]
+	        getitem_1995: "f32[][]cuda:0" = _foreach_reciprocal[71]
+	        getitem_1996: "f32[][]cuda:0" = _foreach_reciprocal[72]
+	        getitem_1997: "f32[][]cuda:0" = _foreach_reciprocal[73]
+	        getitem_1998: "f32[][]cuda:0" = _foreach_reciprocal[74]
+	        getitem_1999: "f32[][]cuda:0" = _foreach_reciprocal[75]
+	        getitem_2000: "f32[][]cuda:0" = _foreach_reciprocal[76]
+	        getitem_2001: "f32[][]cuda:0" = _foreach_reciprocal[77]
+	        getitem_2002: "f32[][]cuda:0" = _foreach_reciprocal[78]
+	        getitem_2003: "f32[][]cuda:0" = _foreach_reciprocal[79]
+	        getitem_2004: "f32[][]cuda:0" = _foreach_reciprocal[80]
+	        getitem_2005: "f32[][]cuda:0" = _foreach_reciprocal[81]
+	        getitem_2006: "f32[][]cuda:0" = _foreach_reciprocal[82]
+	        getitem_2007: "f32[][]cuda:0" = _foreach_reciprocal[83]
+	        getitem_2008: "f32[][]cuda:0" = _foreach_reciprocal[84]
+	        getitem_2009: "f32[][]cuda:0" = _foreach_reciprocal[85]
+	        getitem_2010: "f32[][]cuda:0" = _foreach_reciprocal[86]
+	        getitem_2011: "f32[][]cuda:0" = _foreach_reciprocal[87]
+	        getitem_2012: "f32[][]cuda:0" = _foreach_reciprocal[88]
+	        getitem_2013: "f32[][]cuda:0" = _foreach_reciprocal[89]
+	        getitem_2014: "f32[][]cuda:0" = _foreach_reciprocal[90]
+	        getitem_2015: "f32[][]cuda:0" = _foreach_reciprocal[91]
+	        getitem_2016: "f32[][]cuda:0" = _foreach_reciprocal[92]
+	        getitem_2017: "f32[][]cuda:0" = _foreach_reciprocal[93]
+	        getitem_2018: "f32[][]cuda:0" = _foreach_reciprocal[94]
+	        getitem_2019: "f32[][]cuda:0" = _foreach_reciprocal[95]
+	        getitem_2020: "f32[][]cuda:0" = _foreach_reciprocal[96]
+	        getitem_2021: "f32[][]cuda:0" = _foreach_reciprocal[97]
+	        getitem_2022: "f32[][]cuda:0" = _foreach_reciprocal[98]
+	        getitem_2023: "f32[][]cuda:0" = _foreach_reciprocal[99]
+	        getitem_2024: "f32[][]cuda:0" = _foreach_reciprocal[100]
+	        getitem_2025: "f32[][]cuda:0" = _foreach_reciprocal[101]
+	        getitem_2026: "f32[][]cuda:0" = _foreach_reciprocal[102]
+	        getitem_2027: "f32[][]cuda:0" = _foreach_reciprocal[103]
+	        getitem_2028: "f32[][]cuda:0" = _foreach_reciprocal[104]
+	        getitem_2029: "f32[][]cuda:0" = _foreach_reciprocal[105]
+	        getitem_2030: "f32[][]cuda:0" = _foreach_reciprocal[106]
+	        getitem_2031: "f32[][]cuda:0" = _foreach_reciprocal[107]
+	        getitem_2032: "f32[][]cuda:0" = _foreach_reciprocal[108]
+	        getitem_2033: "f32[][]cuda:0" = _foreach_reciprocal[109]
+	        getitem_2034: "f32[][]cuda:0" = _foreach_reciprocal[110]
+	        getitem_2035: "f32[][]cuda:0" = _foreach_reciprocal[111]
+	        getitem_2036: "f32[][]cuda:0" = _foreach_reciprocal[112]
+	        getitem_2037: "f32[][]cuda:0" = _foreach_reciprocal[113]
+	        getitem_2038: "f32[][]cuda:0" = _foreach_reciprocal[114]
+	        getitem_2039: "f32[][]cuda:0" = _foreach_reciprocal[115]
+	        getitem_2040: "f32[][]cuda:0" = _foreach_reciprocal[116]
+	        getitem_2041: "f32[][]cuda:0" = _foreach_reciprocal[117]
+	        getitem_2042: "f32[][]cuda:0" = _foreach_reciprocal[118]
+	        getitem_2043: "f32[][]cuda:0" = _foreach_reciprocal[119]
+	        getitem_2044: "f32[][]cuda:0" = _foreach_reciprocal[120]
+	        getitem_2045: "f32[][]cuda:0" = _foreach_reciprocal[121]
+	        getitem_2046: "f32[][]cuda:0" = _foreach_reciprocal[122]
+	        getitem_2047: "f32[][]cuda:0" = _foreach_reciprocal[123]
+	        getitem_2048: "f32[][]cuda:0" = _foreach_reciprocal[124]
+	        getitem_2049: "f32[][]cuda:0" = _foreach_reciprocal[125]
+	        getitem_2050: "f32[][]cuda:0" = _foreach_reciprocal[126]
+	        getitem_2051: "f32[][]cuda:0" = _foreach_reciprocal[127]
+	        getitem_2052: "f32[][]cuda:0" = _foreach_reciprocal[128]
+	        getitem_2053: "f32[][]cuda:0" = _foreach_reciprocal[129]
+	        getitem_2054: "f32[][]cuda:0" = _foreach_reciprocal[130]
+	        getitem_2055: "f32[][]cuda:0" = _foreach_reciprocal[131]
+	        getitem_2056: "f32[][]cuda:0" = _foreach_reciprocal[132]
+	        getitem_2057: "f32[][]cuda:0" = _foreach_reciprocal[133]
+	        getitem_2058: "f32[][]cuda:0" = _foreach_reciprocal[134]
+	        getitem_2059: "f32[][]cuda:0" = _foreach_reciprocal[135]
+	        getitem_2060: "f32[][]cuda:0" = _foreach_reciprocal[136]
+	        getitem_2061: "f32[][]cuda:0" = _foreach_reciprocal[137]
+	        getitem_2062: "f32[][]cuda:0" = _foreach_reciprocal[138]
+	        getitem_2063: "f32[][]cuda:0" = _foreach_reciprocal[139]
+	        getitem_2064: "f32[][]cuda:0" = _foreach_reciprocal[140]
+	        getitem_2065: "f32[][]cuda:0" = _foreach_reciprocal[141]
+	        getitem_2066: "f32[][]cuda:0" = _foreach_reciprocal[142]
+	        getitem_2067: "f32[][]cuda:0" = _foreach_reciprocal[143]
+	        getitem_2068: "f32[][]cuda:0" = _foreach_reciprocal[144]
+	        getitem_2069: "f32[][]cuda:0" = _foreach_reciprocal[145]
+	        getitem_2070: "f32[][]cuda:0" = _foreach_reciprocal[146]
+	        getitem_2071: "f32[][]cuda:0" = _foreach_reciprocal[147];  _foreach_reciprocal = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2)
+	        _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]);  getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None
+	        getitem_2072: "f32[][]cuda:0" = _foreach_sqrt[0]
+	        getitem_2073: "f32[][]cuda:0" = _foreach_sqrt[1]
+	        getitem_2074: "f32[][]cuda:0" = _foreach_sqrt[2]
+	        getitem_2075: "f32[][]cuda:0" = _foreach_sqrt[3]
+	        getitem_2076: "f32[][]cuda:0" = _foreach_sqrt[4]
+	        getitem_2077: "f32[][]cuda:0" = _foreach_sqrt[5]
+	        getitem_2078: "f32[][]cuda:0" = _foreach_sqrt[6]
+	        getitem_2079: "f32[][]cuda:0" = _foreach_sqrt[7]
+	        getitem_2080: "f32[][]cuda:0" = _foreach_sqrt[8]
+	        getitem_2081: "f32[][]cuda:0" = _foreach_sqrt[9]
+	        getitem_2082: "f32[][]cuda:0" = _foreach_sqrt[10]
+	        getitem_2083: "f32[][]cuda:0" = _foreach_sqrt[11]
+	        getitem_2084: "f32[][]cuda:0" = _foreach_sqrt[12]
+	        getitem_2085: "f32[][]cuda:0" = _foreach_sqrt[13]
+	        getitem_2086: "f32[][]cuda:0" = _foreach_sqrt[14]
+	        getitem_2087: "f32[][]cuda:0" = _foreach_sqrt[15]
+	        getitem_2088: "f32[][]cuda:0" = _foreach_sqrt[16]
+	        getitem_2089: "f32[][]cuda:0" = _foreach_sqrt[17]
+	        getitem_2090: "f32[][]cuda:0" = _foreach_sqrt[18]
+	        getitem_2091: "f32[][]cuda:0" = _foreach_sqrt[19]
+	        getitem_2092: "f32[][]cuda:0" = _foreach_sqrt[20]
+	        getitem_2093: "f32[][]cuda:0" = _foreach_sqrt[21]
+	        getitem_2094: "f32[][]cuda:0" = _foreach_sqrt[22]
+	        getitem_2095: "f32[][]cuda:0" = _foreach_sqrt[23]
+	        getitem_2096: "f32[][]cuda:0" = _foreach_sqrt[24]
+	        getitem_2097: "f32[][]cuda:0" = _foreach_sqrt[25]
+	        getitem_2098: "f32[][]cuda:0" = _foreach_sqrt[26]
+	        getitem_2099: "f32[][]cuda:0" = _foreach_sqrt[27]
+	        getitem_2100: "f32[][]cuda:0" = _foreach_sqrt[28]
+	        getitem_2101: "f32[][]cuda:0" = _foreach_sqrt[29]
+	        getitem_2102: "f32[][]cuda:0" = _foreach_sqrt[30]
+	        getitem_2103: "f32[][]cuda:0" = _foreach_sqrt[31]
+	        getitem_2104: "f32[][]cuda:0" = _foreach_sqrt[32]
+	        getitem_2105: "f32[][]cuda:0" = _foreach_sqrt[33]
+	        getitem_2106: "f32[][]cuda:0" = _foreach_sqrt[34]
+	        getitem_2107: "f32[][]cuda:0" = _foreach_sqrt[35]
+	        getitem_2108: "f32[][]cuda:0" = _foreach_sqrt[36]
+	        getitem_2109: "f32[][]cuda:0" = _foreach_sqrt[37]
+	        getitem_2110: "f32[][]cuda:0" = _foreach_sqrt[38]
+	        getitem_2111: "f32[][]cuda:0" = _foreach_sqrt[39]
+	        getitem_2112: "f32[][]cuda:0" = _foreach_sqrt[40]
+	        getitem_2113: "f32[][]cuda:0" = _foreach_sqrt[41]
+	        getitem_2114: "f32[][]cuda:0" = _foreach_sqrt[42]
+	        getitem_2115: "f32[][]cuda:0" = _foreach_sqrt[43]
+	        getitem_2116: "f32[][]cuda:0" = _foreach_sqrt[44]
+	        getitem_2117: "f32[][]cuda:0" = _foreach_sqrt[45]
+	        getitem_2118: "f32[][]cuda:0" = _foreach_sqrt[46]
+	        getitem_2119: "f32[][]cuda:0" = _foreach_sqrt[47]
+	        getitem_2120: "f32[][]cuda:0" = _foreach_sqrt[48]
+	        getitem_2121: "f32[][]cuda:0" = _foreach_sqrt[49]
+	        getitem_2122: "f32[][]cuda:0" = _foreach_sqrt[50]
+	        getitem_2123: "f32[][]cuda:0" = _foreach_sqrt[51]
+	        getitem_2124: "f32[][]cuda:0" = _foreach_sqrt[52]
+	        getitem_2125: "f32[][]cuda:0" = _foreach_sqrt[53]
+	        getitem_2126: "f32[][]cuda:0" = _foreach_sqrt[54]
+	        getitem_2127: "f32[][]cuda:0" = _foreach_sqrt[55]
+	        getitem_2128: "f32[][]cuda:0" = _foreach_sqrt[56]
+	        getitem_2129: "f32[][]cuda:0" = _foreach_sqrt[57]
+	        getitem_2130: "f32[][]cuda:0" = _foreach_sqrt[58]
+	        getitem_2131: "f32[][]cuda:0" = _foreach_sqrt[59]
+	        getitem_2132: "f32[][]cuda:0" = _foreach_sqrt[60]
+	        getitem_2133: "f32[][]cuda:0" = _foreach_sqrt[61]
+	        getitem_2134: "f32[][]cuda:0" = _foreach_sqrt[62]
+	        getitem_2135: "f32[][]cuda:0" = _foreach_sqrt[63]
+	        getitem_2136: "f32[][]cuda:0" = _foreach_sqrt[64]
+	        getitem_2137: "f32[][]cuda:0" = _foreach_sqrt[65]
+	        getitem_2138: "f32[][]cuda:0" = _foreach_sqrt[66]
+	        getitem_2139: "f32[][]cuda:0" = _foreach_sqrt[67]
+	        getitem_2140: "f32[][]cuda:0" = _foreach_sqrt[68]
+	        getitem_2141: "f32[][]cuda:0" = _foreach_sqrt[69]
+	        getitem_2142: "f32[][]cuda:0" = _foreach_sqrt[70]
+	        getitem_2143: "f32[][]cuda:0" = _foreach_sqrt[71]
+	        getitem_2144: "f32[][]cuda:0" = _foreach_sqrt[72]
+	        getitem_2145: "f32[][]cuda:0" = _foreach_sqrt[73]
+	        getitem_2146: "f32[][]cuda:0" = _foreach_sqrt[74]
+	        getitem_2147: "f32[][]cuda:0" = _foreach_sqrt[75]
+	        getitem_2148: "f32[][]cuda:0" = _foreach_sqrt[76]
+	        getitem_2149: "f32[][]cuda:0" = _foreach_sqrt[77]
+	        getitem_2150: "f32[][]cuda:0" = _foreach_sqrt[78]
+	        getitem_2151: "f32[][]cuda:0" = _foreach_sqrt[79]
+	        getitem_2152: "f32[][]cuda:0" = _foreach_sqrt[80]
+	        getitem_2153: "f32[][]cuda:0" = _foreach_sqrt[81]
+	        getitem_2154: "f32[][]cuda:0" = _foreach_sqrt[82]
+	        getitem_2155: "f32[][]cuda:0" = _foreach_sqrt[83]
+	        getitem_2156: "f32[][]cuda:0" = _foreach_sqrt[84]
+	        getitem_2157: "f32[][]cuda:0" = _foreach_sqrt[85]
+	        getitem_2158: "f32[][]cuda:0" = _foreach_sqrt[86]
+	        getitem_2159: "f32[][]cuda:0" = _foreach_sqrt[87]
+	        getitem_2160: "f32[][]cuda:0" = _foreach_sqrt[88]
+	        getitem_2161: "f32[][]cuda:0" = _foreach_sqrt[89]
+	        getitem_2162: "f32[][]cuda:0" = _foreach_sqrt[90]
+	        getitem_2163: "f32[][]cuda:0" = _foreach_sqrt[91]
+	        getitem_2164: "f32[][]cuda:0" = _foreach_sqrt[92]
+	        getitem_2165: "f32[][]cuda:0" = _foreach_sqrt[93]
+	        getitem_2166: "f32[][]cuda:0" = _foreach_sqrt[94]
+	        getitem_2167: "f32[][]cuda:0" = _foreach_sqrt[95]
+	        getitem_2168: "f32[][]cuda:0" = _foreach_sqrt[96]
+	        getitem_2169: "f32[][]cuda:0" = _foreach_sqrt[97]
+	        getitem_2170: "f32[][]cuda:0" = _foreach_sqrt[98]
+	        getitem_2171: "f32[][]cuda:0" = _foreach_sqrt[99]
+	        getitem_2172: "f32[][]cuda:0" = _foreach_sqrt[100]
+	        getitem_2173: "f32[][]cuda:0" = _foreach_sqrt[101]
+	        getitem_2174: "f32[][]cuda:0" = _foreach_sqrt[102]
+	        getitem_2175: "f32[][]cuda:0" = _foreach_sqrt[103]
+	        getitem_2176: "f32[][]cuda:0" = _foreach_sqrt[104]
+	        getitem_2177: "f32[][]cuda:0" = _foreach_sqrt[105]
+	        getitem_2178: "f32[][]cuda:0" = _foreach_sqrt[106]
+	        getitem_2179: "f32[][]cuda:0" = _foreach_sqrt[107]
+	        getitem_2180: "f32[][]cuda:0" = _foreach_sqrt[108]
+	        getitem_2181: "f32[][]cuda:0" = _foreach_sqrt[109]
+	        getitem_2182: "f32[][]cuda:0" = _foreach_sqrt[110]
+	        getitem_2183: "f32[][]cuda:0" = _foreach_sqrt[111]
+	        getitem_2184: "f32[][]cuda:0" = _foreach_sqrt[112]
+	        getitem_2185: "f32[][]cuda:0" = _foreach_sqrt[113]
+	        getitem_2186: "f32[][]cuda:0" = _foreach_sqrt[114]
+	        getitem_2187: "f32[][]cuda:0" = _foreach_sqrt[115]
+	        getitem_2188: "f32[][]cuda:0" = _foreach_sqrt[116]
+	        getitem_2189: "f32[][]cuda:0" = _foreach_sqrt[117]
+	        getitem_2190: "f32[][]cuda:0" = _foreach_sqrt[118]
+	        getitem_2191: "f32[][]cuda:0" = _foreach_sqrt[119]
+	        getitem_2192: "f32[][]cuda:0" = _foreach_sqrt[120]
+	        getitem_2193: "f32[][]cuda:0" = _foreach_sqrt[121]
+	        getitem_2194: "f32[][]cuda:0" = _foreach_sqrt[122]
+	        getitem_2195: "f32[][]cuda:0" = _foreach_sqrt[123]
+	        getitem_2196: "f32[][]cuda:0" = _foreach_sqrt[124]
+	        getitem_2197: "f32[][]cuda:0" = _foreach_sqrt[125]
+	        getitem_2198: "f32[][]cuda:0" = _foreach_sqrt[126]
+	        getitem_2199: "f32[][]cuda:0" = _foreach_sqrt[127]
+	        getitem_2200: "f32[][]cuda:0" = _foreach_sqrt[128]
+	        getitem_2201: "f32[][]cuda:0" = _foreach_sqrt[129]
+	        getitem_2202: "f32[][]cuda:0" = _foreach_sqrt[130]
+	        getitem_2203: "f32[][]cuda:0" = _foreach_sqrt[131]
+	        getitem_2204: "f32[][]cuda:0" = _foreach_sqrt[132]
+	        getitem_2205: "f32[][]cuda:0" = _foreach_sqrt[133]
+	        getitem_2206: "f32[][]cuda:0" = _foreach_sqrt[134]
+	        getitem_2207: "f32[][]cuda:0" = _foreach_sqrt[135]
+	        getitem_2208: "f32[][]cuda:0" = _foreach_sqrt[136]
+	        getitem_2209: "f32[][]cuda:0" = _foreach_sqrt[137]
+	        getitem_2210: "f32[][]cuda:0" = _foreach_sqrt[138]
+	        getitem_2211: "f32[][]cuda:0" = _foreach_sqrt[139]
+	        getitem_2212: "f32[][]cuda:0" = _foreach_sqrt[140]
+	        getitem_2213: "f32[][]cuda:0" = _foreach_sqrt[141]
+	        getitem_2214: "f32[][]cuda:0" = _foreach_sqrt[142]
+	        getitem_2215: "f32[][]cuda:0" = _foreach_sqrt[143]
+	        getitem_2216: "f32[][]cuda:0" = _foreach_sqrt[144]
+	        getitem_2217: "f32[][]cuda:0" = _foreach_sqrt[145]
+	        getitem_2218: "f32[][]cuda:0" = _foreach_sqrt[146]
+	        getitem_2219: "f32[][]cuda:0" = _foreach_sqrt[147];  _foreach_sqrt = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs)
+	        _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])
+	        getitem_2220: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt_1[0]
+	        getitem_2221: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt_1[1]
+	        getitem_2222: "f32[768][1]cuda:0" = _foreach_sqrt_1[2]
+	        getitem_2223: "f32[768][1]cuda:0" = _foreach_sqrt_1[3]
+	        getitem_2224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[4]
+	        getitem_2225: "f32[2304][1]cuda:0" = _foreach_sqrt_1[5]
+	        getitem_2226: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[6]
+	        getitem_2227: "f32[768][1]cuda:0" = _foreach_sqrt_1[7]
+	        getitem_2228: "f32[768][1]cuda:0" = _foreach_sqrt_1[8]
+	        getitem_2229: "f32[768][1]cuda:0" = _foreach_sqrt_1[9]
+	        getitem_2230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[10]
+	        getitem_2231: "f32[3072][1]cuda:0" = _foreach_sqrt_1[11]
+	        getitem_2232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[12]
+	        getitem_2233: "f32[768][1]cuda:0" = _foreach_sqrt_1[13]
+	        getitem_2234: "f32[768][1]cuda:0" = _foreach_sqrt_1[14]
+	        getitem_2235: "f32[768][1]cuda:0" = _foreach_sqrt_1[15]
+	        getitem_2236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[16]
+	        getitem_2237: "f32[2304][1]cuda:0" = _foreach_sqrt_1[17]
+	        getitem_2238: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[18]
+	        getitem_2239: "f32[768][1]cuda:0" = _foreach_sqrt_1[19]
+	        getitem_2240: "f32[768][1]cuda:0" = _foreach_sqrt_1[20]
+	        getitem_2241: "f32[768][1]cuda:0" = _foreach_sqrt_1[21]
+	        getitem_2242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[22]
+	        getitem_2243: "f32[3072][1]cuda:0" = _foreach_sqrt_1[23]
+	        getitem_2244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[24]
+	        getitem_2245: "f32[768][1]cuda:0" = _foreach_sqrt_1[25]
+	        getitem_2246: "f32[768][1]cuda:0" = _foreach_sqrt_1[26]
+	        getitem_2247: "f32[768][1]cuda:0" = _foreach_sqrt_1[27]
+	        getitem_2248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[28]
+	        getitem_2249: "f32[2304][1]cuda:0" = _foreach_sqrt_1[29]
+	        getitem_2250: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[30]
+	        getitem_2251: "f32[768][1]cuda:0" = _foreach_sqrt_1[31]
+	        getitem_2252: "f32[768][1]cuda:0" = _foreach_sqrt_1[32]
+	        getitem_2253: "f32[768][1]cuda:0" = _foreach_sqrt_1[33]
+	        getitem_2254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[34]
+	        getitem_2255: "f32[3072][1]cuda:0" = _foreach_sqrt_1[35]
+	        getitem_2256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[36]
+	        getitem_2257: "f32[768][1]cuda:0" = _foreach_sqrt_1[37]
+	        getitem_2258: "f32[768][1]cuda:0" = _foreach_sqrt_1[38]
+	        getitem_2259: "f32[768][1]cuda:0" = _foreach_sqrt_1[39]
+	        getitem_2260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[40]
+	        getitem_2261: "f32[2304][1]cuda:0" = _foreach_sqrt_1[41]
+	        getitem_2262: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[42]
+	        getitem_2263: "f32[768][1]cuda:0" = _foreach_sqrt_1[43]
+	        getitem_2264: "f32[768][1]cuda:0" = _foreach_sqrt_1[44]
+	        getitem_2265: "f32[768][1]cuda:0" = _foreach_sqrt_1[45]
+	        getitem_2266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[46]
+	        getitem_2267: "f32[3072][1]cuda:0" = _foreach_sqrt_1[47]
+	        getitem_2268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[48]
+	        getitem_2269: "f32[768][1]cuda:0" = _foreach_sqrt_1[49]
+	        getitem_2270: "f32[768][1]cuda:0" = _foreach_sqrt_1[50]
+	        getitem_2271: "f32[768][1]cuda:0" = _foreach_sqrt_1[51]
+	        getitem_2272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[52]
+	        getitem_2273: "f32[2304][1]cuda:0" = _foreach_sqrt_1[53]
+	        getitem_2274: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[54]
+	        getitem_2275: "f32[768][1]cuda:0" = _foreach_sqrt_1[55]
+	        getitem_2276: "f32[768][1]cuda:0" = _foreach_sqrt_1[56]
+	        getitem_2277: "f32[768][1]cuda:0" = _foreach_sqrt_1[57]
+	        getitem_2278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[58]
+	        getitem_2279: "f32[3072][1]cuda:0" = _foreach_sqrt_1[59]
+	        getitem_2280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[60]
+	        getitem_2281: "f32[768][1]cuda:0" = _foreach_sqrt_1[61]
+	        getitem_2282: "f32[768][1]cuda:0" = _foreach_sqrt_1[62]
+	        getitem_2283: "f32[768][1]cuda:0" = _foreach_sqrt_1[63]
+	        getitem_2284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[64]
+	        getitem_2285: "f32[2304][1]cuda:0" = _foreach_sqrt_1[65]
+	        getitem_2286: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[66]
+	        getitem_2287: "f32[768][1]cuda:0" = _foreach_sqrt_1[67]
+	        getitem_2288: "f32[768][1]cuda:0" = _foreach_sqrt_1[68]
+	        getitem_2289: "f32[768][1]cuda:0" = _foreach_sqrt_1[69]
+	        getitem_2290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[70]
+	        getitem_2291: "f32[3072][1]cuda:0" = _foreach_sqrt_1[71]
+	        getitem_2292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[72]
+	        getitem_2293: "f32[768][1]cuda:0" = _foreach_sqrt_1[73]
+	        getitem_2294: "f32[768][1]cuda:0" = _foreach_sqrt_1[74]
+	        getitem_2295: "f32[768][1]cuda:0" = _foreach_sqrt_1[75]
+	        getitem_2296: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[76]
+	        getitem_2297: "f32[2304][1]cuda:0" = _foreach_sqrt_1[77]
+	        getitem_2298: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[78]
+	        getitem_2299: "f32[768][1]cuda:0" = _foreach_sqrt_1[79]
+	        getitem_2300: "f32[768][1]cuda:0" = _foreach_sqrt_1[80]
+	        getitem_2301: "f32[768][1]cuda:0" = _foreach_sqrt_1[81]
+	        getitem_2302: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[82]
+	        getitem_2303: "f32[3072][1]cuda:0" = _foreach_sqrt_1[83]
+	        getitem_2304: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[84]
+	        getitem_2305: "f32[768][1]cuda:0" = _foreach_sqrt_1[85]
+	        getitem_2306: "f32[768][1]cuda:0" = _foreach_sqrt_1[86]
+	        getitem_2307: "f32[768][1]cuda:0" = _foreach_sqrt_1[87]
+	        getitem_2308: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[88]
+	        getitem_2309: "f32[2304][1]cuda:0" = _foreach_sqrt_1[89]
+	        getitem_2310: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[90]
+	        getitem_2311: "f32[768][1]cuda:0" = _foreach_sqrt_1[91]
+	        getitem_2312: "f32[768][1]cuda:0" = _foreach_sqrt_1[92]
+	        getitem_2313: "f32[768][1]cuda:0" = _foreach_sqrt_1[93]
+	        getitem_2314: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[94]
+	        getitem_2315: "f32[3072][1]cuda:0" = _foreach_sqrt_1[95]
+	        getitem_2316: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[96]
+	        getitem_2317: "f32[768][1]cuda:0" = _foreach_sqrt_1[97]
+	        getitem_2318: "f32[768][1]cuda:0" = _foreach_sqrt_1[98]
+	        getitem_2319: "f32[768][1]cuda:0" = _foreach_sqrt_1[99]
+	        getitem_2320: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[100]
+	        getitem_2321: "f32[2304][1]cuda:0" = _foreach_sqrt_1[101]
+	        getitem_2322: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[102]
+	        getitem_2323: "f32[768][1]cuda:0" = _foreach_sqrt_1[103]
+	        getitem_2324: "f32[768][1]cuda:0" = _foreach_sqrt_1[104]
+	        getitem_2325: "f32[768][1]cuda:0" = _foreach_sqrt_1[105]
+	        getitem_2326: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[106]
+	        getitem_2327: "f32[3072][1]cuda:0" = _foreach_sqrt_1[107]
+	        getitem_2328: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[108]
+	        getitem_2329: "f32[768][1]cuda:0" = _foreach_sqrt_1[109]
+	        getitem_2330: "f32[768][1]cuda:0" = _foreach_sqrt_1[110]
+	        getitem_2331: "f32[768][1]cuda:0" = _foreach_sqrt_1[111]
+	        getitem_2332: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[112]
+	        getitem_2333: "f32[2304][1]cuda:0" = _foreach_sqrt_1[113]
+	        getitem_2334: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[114]
+	        getitem_2335: "f32[768][1]cuda:0" = _foreach_sqrt_1[115]
+	        getitem_2336: "f32[768][1]cuda:0" = _foreach_sqrt_1[116]
+	        getitem_2337: "f32[768][1]cuda:0" = _foreach_sqrt_1[117]
+	        getitem_2338: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[118]
+	        getitem_2339: "f32[3072][1]cuda:0" = _foreach_sqrt_1[119]
+	        getitem_2340: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[120]
+	        getitem_2341: "f32[768][1]cuda:0" = _foreach_sqrt_1[121]
+	        getitem_2342: "f32[768][1]cuda:0" = _foreach_sqrt_1[122]
+	        getitem_2343: "f32[768][1]cuda:0" = _foreach_sqrt_1[123]
+	        getitem_2344: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[124]
+	        getitem_2345: "f32[2304][1]cuda:0" = _foreach_sqrt_1[125]
+	        getitem_2346: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[126]
+	        getitem_2347: "f32[768][1]cuda:0" = _foreach_sqrt_1[127]
+	        getitem_2348: "f32[768][1]cuda:0" = _foreach_sqrt_1[128]
+	        getitem_2349: "f32[768][1]cuda:0" = _foreach_sqrt_1[129]
+	        getitem_2350: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[130]
+	        getitem_2351: "f32[3072][1]cuda:0" = _foreach_sqrt_1[131]
+	        getitem_2352: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[132]
+	        getitem_2353: "f32[768][1]cuda:0" = _foreach_sqrt_1[133]
+	        getitem_2354: "f32[768][1]cuda:0" = _foreach_sqrt_1[134]
+	        getitem_2355: "f32[768][1]cuda:0" = _foreach_sqrt_1[135]
+	        getitem_2356: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[136]
+	        getitem_2357: "f32[2304][1]cuda:0" = _foreach_sqrt_1[137]
+	        getitem_2358: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[138]
+	        getitem_2359: "f32[768][1]cuda:0" = _foreach_sqrt_1[139]
+	        getitem_2360: "f32[768][1]cuda:0" = _foreach_sqrt_1[140]
+	        getitem_2361: "f32[768][1]cuda:0" = _foreach_sqrt_1[141]
+	        getitem_2362: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[142]
+	        getitem_2363: "f32[3072][1]cuda:0" = _foreach_sqrt_1[143]
+	        getitem_2364: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[144]
+	        getitem_2365: "f32[768][1]cuda:0" = _foreach_sqrt_1[145]
+	        getitem_2366: "f32[768][1]cuda:0" = _foreach_sqrt_1[146]
+	        getitem_2367: "f32[768][1]cuda:0" = _foreach_sqrt_1[147];  _foreach_sqrt_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt)
+	        _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]);  getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None
+	        getitem_2368: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_1[0]
+	        getitem_2369: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_1[1]
+	        getitem_2370: "f32[768][1]cuda:0" = _foreach_div_1[2]
+	        getitem_2371: "f32[768][1]cuda:0" = _foreach_div_1[3]
+	        getitem_2372: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[4]
+	        getitem_2373: "f32[2304][1]cuda:0" = _foreach_div_1[5]
+	        getitem_2374: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[6]
+	        getitem_2375: "f32[768][1]cuda:0" = _foreach_div_1[7]
+	        getitem_2376: "f32[768][1]cuda:0" = _foreach_div_1[8]
+	        getitem_2377: "f32[768][1]cuda:0" = _foreach_div_1[9]
+	        getitem_2378: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[10]
+	        getitem_2379: "f32[3072][1]cuda:0" = _foreach_div_1[11]
+	        getitem_2380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[12]
+	        getitem_2381: "f32[768][1]cuda:0" = _foreach_div_1[13]
+	        getitem_2382: "f32[768][1]cuda:0" = _foreach_div_1[14]
+	        getitem_2383: "f32[768][1]cuda:0" = _foreach_div_1[15]
+	        getitem_2384: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[16]
+	        getitem_2385: "f32[2304][1]cuda:0" = _foreach_div_1[17]
+	        getitem_2386: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[18]
+	        getitem_2387: "f32[768][1]cuda:0" = _foreach_div_1[19]
+	        getitem_2388: "f32[768][1]cuda:0" = _foreach_div_1[20]
+	        getitem_2389: "f32[768][1]cuda:0" = _foreach_div_1[21]
+	        getitem_2390: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[22]
+	        getitem_2391: "f32[3072][1]cuda:0" = _foreach_div_1[23]
+	        getitem_2392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[24]
+	        getitem_2393: "f32[768][1]cuda:0" = _foreach_div_1[25]
+	        getitem_2394: "f32[768][1]cuda:0" = _foreach_div_1[26]
+	        getitem_2395: "f32[768][1]cuda:0" = _foreach_div_1[27]
+	        getitem_2396: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[28]
+	        getitem_2397: "f32[2304][1]cuda:0" = _foreach_div_1[29]
+	        getitem_2398: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[30]
+	        getitem_2399: "f32[768][1]cuda:0" = _foreach_div_1[31]
+	        getitem_2400: "f32[768][1]cuda:0" = _foreach_div_1[32]
+	        getitem_2401: "f32[768][1]cuda:0" = _foreach_div_1[33]
+	        getitem_2402: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[34]
+	        getitem_2403: "f32[3072][1]cuda:0" = _foreach_div_1[35]
+	        getitem_2404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[36]
+	        getitem_2405: "f32[768][1]cuda:0" = _foreach_div_1[37]
+	        getitem_2406: "f32[768][1]cuda:0" = _foreach_div_1[38]
+	        getitem_2407: "f32[768][1]cuda:0" = _foreach_div_1[39]
+	        getitem_2408: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[40]
+	        getitem_2409: "f32[2304][1]cuda:0" = _foreach_div_1[41]
+	        getitem_2410: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[42]
+	        getitem_2411: "f32[768][1]cuda:0" = _foreach_div_1[43]
+	        getitem_2412: "f32[768][1]cuda:0" = _foreach_div_1[44]
+	        getitem_2413: "f32[768][1]cuda:0" = _foreach_div_1[45]
+	        getitem_2414: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[46]
+	        getitem_2415: "f32[3072][1]cuda:0" = _foreach_div_1[47]
+	        getitem_2416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[48]
+	        getitem_2417: "f32[768][1]cuda:0" = _foreach_div_1[49]
+	        getitem_2418: "f32[768][1]cuda:0" = _foreach_div_1[50]
+	        getitem_2419: "f32[768][1]cuda:0" = _foreach_div_1[51]
+	        getitem_2420: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[52]
+	        getitem_2421: "f32[2304][1]cuda:0" = _foreach_div_1[53]
+	        getitem_2422: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[54]
+	        getitem_2423: "f32[768][1]cuda:0" = _foreach_div_1[55]
+	        getitem_2424: "f32[768][1]cuda:0" = _foreach_div_1[56]
+	        getitem_2425: "f32[768][1]cuda:0" = _foreach_div_1[57]
+	        getitem_2426: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[58]
+	        getitem_2427: "f32[3072][1]cuda:0" = _foreach_div_1[59]
+	        getitem_2428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[60]
+	        getitem_2429: "f32[768][1]cuda:0" = _foreach_div_1[61]
+	        getitem_2430: "f32[768][1]cuda:0" = _foreach_div_1[62]
+	        getitem_2431: "f32[768][1]cuda:0" = _foreach_div_1[63]
+	        getitem_2432: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[64]
+	        getitem_2433: "f32[2304][1]cuda:0" = _foreach_div_1[65]
+	        getitem_2434: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[66]
+	        getitem_2435: "f32[768][1]cuda:0" = _foreach_div_1[67]
+	        getitem_2436: "f32[768][1]cuda:0" = _foreach_div_1[68]
+	        getitem_2437: "f32[768][1]cuda:0" = _foreach_div_1[69]
+	        getitem_2438: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[70]
+	        getitem_2439: "f32[3072][1]cuda:0" = _foreach_div_1[71]
+	        getitem_2440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[72]
+	        getitem_2441: "f32[768][1]cuda:0" = _foreach_div_1[73]
+	        getitem_2442: "f32[768][1]cuda:0" = _foreach_div_1[74]
+	        getitem_2443: "f32[768][1]cuda:0" = _foreach_div_1[75]
+	        getitem_2444: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[76]
+	        getitem_2445: "f32[2304][1]cuda:0" = _foreach_div_1[77]
+	        getitem_2446: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[78]
+	        getitem_2447: "f32[768][1]cuda:0" = _foreach_div_1[79]
+	        getitem_2448: "f32[768][1]cuda:0" = _foreach_div_1[80]
+	        getitem_2449: "f32[768][1]cuda:0" = _foreach_div_1[81]
+	        getitem_2450: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[82]
+	        getitem_2451: "f32[3072][1]cuda:0" = _foreach_div_1[83]
+	        getitem_2452: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[84]
+	        getitem_2453: "f32[768][1]cuda:0" = _foreach_div_1[85]
+	        getitem_2454: "f32[768][1]cuda:0" = _foreach_div_1[86]
+	        getitem_2455: "f32[768][1]cuda:0" = _foreach_div_1[87]
+	        getitem_2456: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[88]
+	        getitem_2457: "f32[2304][1]cuda:0" = _foreach_div_1[89]
+	        getitem_2458: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[90]
+	        getitem_2459: "f32[768][1]cuda:0" = _foreach_div_1[91]
+	        getitem_2460: "f32[768][1]cuda:0" = _foreach_div_1[92]
+	        getitem_2461: "f32[768][1]cuda:0" = _foreach_div_1[93]
+	        getitem_2462: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[94]
+	        getitem_2463: "f32[3072][1]cuda:0" = _foreach_div_1[95]
+	        getitem_2464: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[96]
+	        getitem_2465: "f32[768][1]cuda:0" = _foreach_div_1[97]
+	        getitem_2466: "f32[768][1]cuda:0" = _foreach_div_1[98]
+	        getitem_2467: "f32[768][1]cuda:0" = _foreach_div_1[99]
+	        getitem_2468: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[100]
+	        getitem_2469: "f32[2304][1]cuda:0" = _foreach_div_1[101]
+	        getitem_2470: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[102]
+	        getitem_2471: "f32[768][1]cuda:0" = _foreach_div_1[103]
+	        getitem_2472: "f32[768][1]cuda:0" = _foreach_div_1[104]
+	        getitem_2473: "f32[768][1]cuda:0" = _foreach_div_1[105]
+	        getitem_2474: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[106]
+	        getitem_2475: "f32[3072][1]cuda:0" = _foreach_div_1[107]
+	        getitem_2476: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[108]
+	        getitem_2477: "f32[768][1]cuda:0" = _foreach_div_1[109]
+	        getitem_2478: "f32[768][1]cuda:0" = _foreach_div_1[110]
+	        getitem_2479: "f32[768][1]cuda:0" = _foreach_div_1[111]
+	        getitem_2480: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[112]
+	        getitem_2481: "f32[2304][1]cuda:0" = _foreach_div_1[113]
+	        getitem_2482: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[114]
+	        getitem_2483: "f32[768][1]cuda:0" = _foreach_div_1[115]
+	        getitem_2484: "f32[768][1]cuda:0" = _foreach_div_1[116]
+	        getitem_2485: "f32[768][1]cuda:0" = _foreach_div_1[117]
+	        getitem_2486: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[118]
+	        getitem_2487: "f32[3072][1]cuda:0" = _foreach_div_1[119]
+	        getitem_2488: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[120]
+	        getitem_2489: "f32[768][1]cuda:0" = _foreach_div_1[121]
+	        getitem_2490: "f32[768][1]cuda:0" = _foreach_div_1[122]
+	        getitem_2491: "f32[768][1]cuda:0" = _foreach_div_1[123]
+	        getitem_2492: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[124]
+	        getitem_2493: "f32[2304][1]cuda:0" = _foreach_div_1[125]
+	        getitem_2494: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[126]
+	        getitem_2495: "f32[768][1]cuda:0" = _foreach_div_1[127]
+	        getitem_2496: "f32[768][1]cuda:0" = _foreach_div_1[128]
+	        getitem_2497: "f32[768][1]cuda:0" = _foreach_div_1[129]
+	        getitem_2498: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[130]
+	        getitem_2499: "f32[3072][1]cuda:0" = _foreach_div_1[131]
+	        getitem_2500: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[132]
+	        getitem_2501: "f32[768][1]cuda:0" = _foreach_div_1[133]
+	        getitem_2502: "f32[768][1]cuda:0" = _foreach_div_1[134]
+	        getitem_2503: "f32[768][1]cuda:0" = _foreach_div_1[135]
+	        getitem_2504: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[136]
+	        getitem_2505: "f32[2304][1]cuda:0" = _foreach_div_1[137]
+	        getitem_2506: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[138]
+	        getitem_2507: "f32[768][1]cuda:0" = _foreach_div_1[139]
+	        getitem_2508: "f32[768][1]cuda:0" = _foreach_div_1[140]
+	        getitem_2509: "f32[768][1]cuda:0" = _foreach_div_1[141]
+	        getitem_2510: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[142]
+	        getitem_2511: "f32[3072][1]cuda:0" = _foreach_div_1[143]
+	        getitem_2512: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[144]
+	        getitem_2513: "f32[768][1]cuda:0" = _foreach_div_1[145]
+	        getitem_2514: "f32[768][1]cuda:0" = _foreach_div_1[146]
+	        getitem_2515: "f32[768][1]cuda:0" = _foreach_div_1[147];  _foreach_div_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps)
+	        _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08);  getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None
+	        getitem_2516: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_3[0]
+	        getitem_2517: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_3[1]
+	        getitem_2518: "f32[768][1]cuda:0" = _foreach_add_3[2]
+	        getitem_2519: "f32[768][1]cuda:0" = _foreach_add_3[3]
+	        getitem_2520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[4]
+	        getitem_2521: "f32[2304][1]cuda:0" = _foreach_add_3[5]
+	        getitem_2522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[6]
+	        getitem_2523: "f32[768][1]cuda:0" = _foreach_add_3[7]
+	        getitem_2524: "f32[768][1]cuda:0" = _foreach_add_3[8]
+	        getitem_2525: "f32[768][1]cuda:0" = _foreach_add_3[9]
+	        getitem_2526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[10]
+	        getitem_2527: "f32[3072][1]cuda:0" = _foreach_add_3[11]
+	        getitem_2528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[12]
+	        getitem_2529: "f32[768][1]cuda:0" = _foreach_add_3[13]
+	        getitem_2530: "f32[768][1]cuda:0" = _foreach_add_3[14]
+	        getitem_2531: "f32[768][1]cuda:0" = _foreach_add_3[15]
+	        getitem_2532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[16]
+	        getitem_2533: "f32[2304][1]cuda:0" = _foreach_add_3[17]
+	        getitem_2534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[18]
+	        getitem_2535: "f32[768][1]cuda:0" = _foreach_add_3[19]
+	        getitem_2536: "f32[768][1]cuda:0" = _foreach_add_3[20]
+	        getitem_2537: "f32[768][1]cuda:0" = _foreach_add_3[21]
+	        getitem_2538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[22]
+	        getitem_2539: "f32[3072][1]cuda:0" = _foreach_add_3[23]
+	        getitem_2540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[24]
+	        getitem_2541: "f32[768][1]cuda:0" = _foreach_add_3[25]
+	        getitem_2542: "f32[768][1]cuda:0" = _foreach_add_3[26]
+	        getitem_2543: "f32[768][1]cuda:0" = _foreach_add_3[27]
+	        getitem_2544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[28]
+	        getitem_2545: "f32[2304][1]cuda:0" = _foreach_add_3[29]
+	        getitem_2546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[30]
+	        getitem_2547: "f32[768][1]cuda:0" = _foreach_add_3[31]
+	        getitem_2548: "f32[768][1]cuda:0" = _foreach_add_3[32]
+	        getitem_2549: "f32[768][1]cuda:0" = _foreach_add_3[33]
+	        getitem_2550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[34]
+	        getitem_2551: "f32[3072][1]cuda:0" = _foreach_add_3[35]
+	        getitem_2552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[36]
+	        getitem_2553: "f32[768][1]cuda:0" = _foreach_add_3[37]
+	        getitem_2554: "f32[768][1]cuda:0" = _foreach_add_3[38]
+	        getitem_2555: "f32[768][1]cuda:0" = _foreach_add_3[39]
+	        getitem_2556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[40]
+	        getitem_2557: "f32[2304][1]cuda:0" = _foreach_add_3[41]
+	        getitem_2558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[42]
+	        getitem_2559: "f32[768][1]cuda:0" = _foreach_add_3[43]
+	        getitem_2560: "f32[768][1]cuda:0" = _foreach_add_3[44]
+	        getitem_2561: "f32[768][1]cuda:0" = _foreach_add_3[45]
+	        getitem_2562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[46]
+	        getitem_2563: "f32[3072][1]cuda:0" = _foreach_add_3[47]
+	        getitem_2564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[48]
+	        getitem_2565: "f32[768][1]cuda:0" = _foreach_add_3[49]
+	        getitem_2566: "f32[768][1]cuda:0" = _foreach_add_3[50]
+	        getitem_2567: "f32[768][1]cuda:0" = _foreach_add_3[51]
+	        getitem_2568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[52]
+	        getitem_2569: "f32[2304][1]cuda:0" = _foreach_add_3[53]
+	        getitem_2570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[54]
+	        getitem_2571: "f32[768][1]cuda:0" = _foreach_add_3[55]
+	        getitem_2572: "f32[768][1]cuda:0" = _foreach_add_3[56]
+	        getitem_2573: "f32[768][1]cuda:0" = _foreach_add_3[57]
+	        getitem_2574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[58]
+	        getitem_2575: "f32[3072][1]cuda:0" = _foreach_add_3[59]
+	        getitem_2576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[60]
+	        getitem_2577: "f32[768][1]cuda:0" = _foreach_add_3[61]
+	        getitem_2578: "f32[768][1]cuda:0" = _foreach_add_3[62]
+	        getitem_2579: "f32[768][1]cuda:0" = _foreach_add_3[63]
+	        getitem_2580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[64]
+	        getitem_2581: "f32[2304][1]cuda:0" = _foreach_add_3[65]
+	        getitem_2582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[66]
+	        getitem_2583: "f32[768][1]cuda:0" = _foreach_add_3[67]
+	        getitem_2584: "f32[768][1]cuda:0" = _foreach_add_3[68]
+	        getitem_2585: "f32[768][1]cuda:0" = _foreach_add_3[69]
+	        getitem_2586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[70]
+	        getitem_2587: "f32[3072][1]cuda:0" = _foreach_add_3[71]
+	        getitem_2588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[72]
+	        getitem_2589: "f32[768][1]cuda:0" = _foreach_add_3[73]
+	        getitem_2590: "f32[768][1]cuda:0" = _foreach_add_3[74]
+	        getitem_2591: "f32[768][1]cuda:0" = _foreach_add_3[75]
+	        getitem_2592: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[76]
+	        getitem_2593: "f32[2304][1]cuda:0" = _foreach_add_3[77]
+	        getitem_2594: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[78]
+	        getitem_2595: "f32[768][1]cuda:0" = _foreach_add_3[79]
+	        getitem_2596: "f32[768][1]cuda:0" = _foreach_add_3[80]
+	        getitem_2597: "f32[768][1]cuda:0" = _foreach_add_3[81]
+	        getitem_2598: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[82]
+	        getitem_2599: "f32[3072][1]cuda:0" = _foreach_add_3[83]
+	        getitem_2600: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[84]
+	        getitem_2601: "f32[768][1]cuda:0" = _foreach_add_3[85]
+	        getitem_2602: "f32[768][1]cuda:0" = _foreach_add_3[86]
+	        getitem_2603: "f32[768][1]cuda:0" = _foreach_add_3[87]
+	        getitem_2604: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[88]
+	        getitem_2605: "f32[2304][1]cuda:0" = _foreach_add_3[89]
+	        getitem_2606: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[90]
+	        getitem_2607: "f32[768][1]cuda:0" = _foreach_add_3[91]
+	        getitem_2608: "f32[768][1]cuda:0" = _foreach_add_3[92]
+	        getitem_2609: "f32[768][1]cuda:0" = _foreach_add_3[93]
+	        getitem_2610: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[94]
+	        getitem_2611: "f32[3072][1]cuda:0" = _foreach_add_3[95]
+	        getitem_2612: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[96]
+	        getitem_2613: "f32[768][1]cuda:0" = _foreach_add_3[97]
+	        getitem_2614: "f32[768][1]cuda:0" = _foreach_add_3[98]
+	        getitem_2615: "f32[768][1]cuda:0" = _foreach_add_3[99]
+	        getitem_2616: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[100]
+	        getitem_2617: "f32[2304][1]cuda:0" = _foreach_add_3[101]
+	        getitem_2618: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[102]
+	        getitem_2619: "f32[768][1]cuda:0" = _foreach_add_3[103]
+	        getitem_2620: "f32[768][1]cuda:0" = _foreach_add_3[104]
+	        getitem_2621: "f32[768][1]cuda:0" = _foreach_add_3[105]
+	        getitem_2622: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[106]
+	        getitem_2623: "f32[3072][1]cuda:0" = _foreach_add_3[107]
+	        getitem_2624: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[108]
+	        getitem_2625: "f32[768][1]cuda:0" = _foreach_add_3[109]
+	        getitem_2626: "f32[768][1]cuda:0" = _foreach_add_3[110]
+	        getitem_2627: "f32[768][1]cuda:0" = _foreach_add_3[111]
+	        getitem_2628: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[112]
+	        getitem_2629: "f32[2304][1]cuda:0" = _foreach_add_3[113]
+	        getitem_2630: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[114]
+	        getitem_2631: "f32[768][1]cuda:0" = _foreach_add_3[115]
+	        getitem_2632: "f32[768][1]cuda:0" = _foreach_add_3[116]
+	        getitem_2633: "f32[768][1]cuda:0" = _foreach_add_3[117]
+	        getitem_2634: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[118]
+	        getitem_2635: "f32[3072][1]cuda:0" = _foreach_add_3[119]
+	        getitem_2636: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[120]
+	        getitem_2637: "f32[768][1]cuda:0" = _foreach_add_3[121]
+	        getitem_2638: "f32[768][1]cuda:0" = _foreach_add_3[122]
+	        getitem_2639: "f32[768][1]cuda:0" = _foreach_add_3[123]
+	        getitem_2640: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[124]
+	        getitem_2641: "f32[2304][1]cuda:0" = _foreach_add_3[125]
+	        getitem_2642: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[126]
+	        getitem_2643: "f32[768][1]cuda:0" = _foreach_add_3[127]
+	        getitem_2644: "f32[768][1]cuda:0" = _foreach_add_3[128]
+	        getitem_2645: "f32[768][1]cuda:0" = _foreach_add_3[129]
+	        getitem_2646: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[130]
+	        getitem_2647: "f32[3072][1]cuda:0" = _foreach_add_3[131]
+	        getitem_2648: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[132]
+	        getitem_2649: "f32[768][1]cuda:0" = _foreach_add_3[133]
+	        getitem_2650: "f32[768][1]cuda:0" = _foreach_add_3[134]
+	        getitem_2651: "f32[768][1]cuda:0" = _foreach_add_3[135]
+	        getitem_2652: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[136]
+	        getitem_2653: "f32[2304][1]cuda:0" = _foreach_add_3[137]
+	        getitem_2654: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[138]
+	        getitem_2655: "f32[768][1]cuda:0" = _foreach_add_3[139]
+	        getitem_2656: "f32[768][1]cuda:0" = _foreach_add_3[140]
+	        getitem_2657: "f32[768][1]cuda:0" = _foreach_add_3[141]
+	        getitem_2658: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[142]
+	        getitem_2659: "f32[3072][1]cuda:0" = _foreach_add_3[143]
+	        getitem_2660: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[144]
+	        getitem_2661: "f32[768][1]cuda:0" = _foreach_add_3[145]
+	        getitem_2662: "f32[768][1]cuda:0" = _foreach_add_3[146]
+	        getitem_2663: "f32[768][1]cuda:0" = _foreach_add_3[147];  _foreach_add_3 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size)
+	        _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]);  getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None
+	        getitem_2664: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_2[0]
+	        getitem_2665: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_2[1]
+	        getitem_2666: "f32[768][1]cuda:0" = _foreach_div_2[2]
+	        getitem_2667: "f32[768][1]cuda:0" = _foreach_div_2[3]
+	        getitem_2668: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[4]
+	        getitem_2669: "f32[2304][1]cuda:0" = _foreach_div_2[5]
+	        getitem_2670: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[6]
+	        getitem_2671: "f32[768][1]cuda:0" = _foreach_div_2[7]
+	        getitem_2672: "f32[768][1]cuda:0" = _foreach_div_2[8]
+	        getitem_2673: "f32[768][1]cuda:0" = _foreach_div_2[9]
+	        getitem_2674: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[10]
+	        getitem_2675: "f32[3072][1]cuda:0" = _foreach_div_2[11]
+	        getitem_2676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[12]
+	        getitem_2677: "f32[768][1]cuda:0" = _foreach_div_2[13]
+	        getitem_2678: "f32[768][1]cuda:0" = _foreach_div_2[14]
+	        getitem_2679: "f32[768][1]cuda:0" = _foreach_div_2[15]
+	        getitem_2680: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[16]
+	        getitem_2681: "f32[2304][1]cuda:0" = _foreach_div_2[17]
+	        getitem_2682: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[18]
+	        getitem_2683: "f32[768][1]cuda:0" = _foreach_div_2[19]
+	        getitem_2684: "f32[768][1]cuda:0" = _foreach_div_2[20]
+	        getitem_2685: "f32[768][1]cuda:0" = _foreach_div_2[21]
+	        getitem_2686: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[22]
+	        getitem_2687: "f32[3072][1]cuda:0" = _foreach_div_2[23]
+	        getitem_2688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[24]
+	        getitem_2689: "f32[768][1]cuda:0" = _foreach_div_2[25]
+	        getitem_2690: "f32[768][1]cuda:0" = _foreach_div_2[26]
+	        getitem_2691: "f32[768][1]cuda:0" = _foreach_div_2[27]
+	        getitem_2692: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[28]
+	        getitem_2693: "f32[2304][1]cuda:0" = _foreach_div_2[29]
+	        getitem_2694: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[30]
+	        getitem_2695: "f32[768][1]cuda:0" = _foreach_div_2[31]
+	        getitem_2696: "f32[768][1]cuda:0" = _foreach_div_2[32]
+	        getitem_2697: "f32[768][1]cuda:0" = _foreach_div_2[33]
+	        getitem_2698: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[34]
+	        getitem_2699: "f32[3072][1]cuda:0" = _foreach_div_2[35]
+	        getitem_2700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[36]
+	        getitem_2701: "f32[768][1]cuda:0" = _foreach_div_2[37]
+	        getitem_2702: "f32[768][1]cuda:0" = _foreach_div_2[38]
+	        getitem_2703: "f32[768][1]cuda:0" = _foreach_div_2[39]
+	        getitem_2704: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[40]
+	        getitem_2705: "f32[2304][1]cuda:0" = _foreach_div_2[41]
+	        getitem_2706: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[42]
+	        getitem_2707: "f32[768][1]cuda:0" = _foreach_div_2[43]
+	        getitem_2708: "f32[768][1]cuda:0" = _foreach_div_2[44]
+	        getitem_2709: "f32[768][1]cuda:0" = _foreach_div_2[45]
+	        getitem_2710: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[46]
+	        getitem_2711: "f32[3072][1]cuda:0" = _foreach_div_2[47]
+	        getitem_2712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[48]
+	        getitem_2713: "f32[768][1]cuda:0" = _foreach_div_2[49]
+	        getitem_2714: "f32[768][1]cuda:0" = _foreach_div_2[50]
+	        getitem_2715: "f32[768][1]cuda:0" = _foreach_div_2[51]
+	        getitem_2716: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[52]
+	        getitem_2717: "f32[2304][1]cuda:0" = _foreach_div_2[53]
+	        getitem_2718: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[54]
+	        getitem_2719: "f32[768][1]cuda:0" = _foreach_div_2[55]
+	        getitem_2720: "f32[768][1]cuda:0" = _foreach_div_2[56]
+	        getitem_2721: "f32[768][1]cuda:0" = _foreach_div_2[57]
+	        getitem_2722: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[58]
+	        getitem_2723: "f32[3072][1]cuda:0" = _foreach_div_2[59]
+	        getitem_2724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[60]
+	        getitem_2725: "f32[768][1]cuda:0" = _foreach_div_2[61]
+	        getitem_2726: "f32[768][1]cuda:0" = _foreach_div_2[62]
+	        getitem_2727: "f32[768][1]cuda:0" = _foreach_div_2[63]
+	        getitem_2728: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[64]
+	        getitem_2729: "f32[2304][1]cuda:0" = _foreach_div_2[65]
+	        getitem_2730: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[66]
+	        getitem_2731: "f32[768][1]cuda:0" = _foreach_div_2[67]
+	        getitem_2732: "f32[768][1]cuda:0" = _foreach_div_2[68]
+	        getitem_2733: "f32[768][1]cuda:0" = _foreach_div_2[69]
+	        getitem_2734: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[70]
+	        getitem_2735: "f32[3072][1]cuda:0" = _foreach_div_2[71]
+	        getitem_2736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[72]
+	        getitem_2737: "f32[768][1]cuda:0" = _foreach_div_2[73]
+	        getitem_2738: "f32[768][1]cuda:0" = _foreach_div_2[74]
+	        getitem_2739: "f32[768][1]cuda:0" = _foreach_div_2[75]
+	        getitem_2740: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[76]
+	        getitem_2741: "f32[2304][1]cuda:0" = _foreach_div_2[77]
+	        getitem_2742: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[78]
+	        getitem_2743: "f32[768][1]cuda:0" = _foreach_div_2[79]
+	        getitem_2744: "f32[768][1]cuda:0" = _foreach_div_2[80]
+	        getitem_2745: "f32[768][1]cuda:0" = _foreach_div_2[81]
+	        getitem_2746: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[82]
+	        getitem_2747: "f32[3072][1]cuda:0" = _foreach_div_2[83]
+	        getitem_2748: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[84]
+	        getitem_2749: "f32[768][1]cuda:0" = _foreach_div_2[85]
+	        getitem_2750: "f32[768][1]cuda:0" = _foreach_div_2[86]
+	        getitem_2751: "f32[768][1]cuda:0" = _foreach_div_2[87]
+	        getitem_2752: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[88]
+	        getitem_2753: "f32[2304][1]cuda:0" = _foreach_div_2[89]
+	        getitem_2754: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[90]
+	        getitem_2755: "f32[768][1]cuda:0" = _foreach_div_2[91]
+	        getitem_2756: "f32[768][1]cuda:0" = _foreach_div_2[92]
+	        getitem_2757: "f32[768][1]cuda:0" = _foreach_div_2[93]
+	        getitem_2758: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[94]
+	        getitem_2759: "f32[3072][1]cuda:0" = _foreach_div_2[95]
+	        getitem_2760: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[96]
+	        getitem_2761: "f32[768][1]cuda:0" = _foreach_div_2[97]
+	        getitem_2762: "f32[768][1]cuda:0" = _foreach_div_2[98]
+	        getitem_2763: "f32[768][1]cuda:0" = _foreach_div_2[99]
+	        getitem_2764: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[100]
+	        getitem_2765: "f32[2304][1]cuda:0" = _foreach_div_2[101]
+	        getitem_2766: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[102]
+	        getitem_2767: "f32[768][1]cuda:0" = _foreach_div_2[103]
+	        getitem_2768: "f32[768][1]cuda:0" = _foreach_div_2[104]
+	        getitem_2769: "f32[768][1]cuda:0" = _foreach_div_2[105]
+	        getitem_2770: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[106]
+	        getitem_2771: "f32[3072][1]cuda:0" = _foreach_div_2[107]
+	        getitem_2772: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[108]
+	        getitem_2773: "f32[768][1]cuda:0" = _foreach_div_2[109]
+	        getitem_2774: "f32[768][1]cuda:0" = _foreach_div_2[110]
+	        getitem_2775: "f32[768][1]cuda:0" = _foreach_div_2[111]
+	        getitem_2776: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[112]
+	        getitem_2777: "f32[2304][1]cuda:0" = _foreach_div_2[113]
+	        getitem_2778: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[114]
+	        getitem_2779: "f32[768][1]cuda:0" = _foreach_div_2[115]
+	        getitem_2780: "f32[768][1]cuda:0" = _foreach_div_2[116]
+	        getitem_2781: "f32[768][1]cuda:0" = _foreach_div_2[117]
+	        getitem_2782: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[118]
+	        getitem_2783: "f32[3072][1]cuda:0" = _foreach_div_2[119]
+	        getitem_2784: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[120]
+	        getitem_2785: "f32[768][1]cuda:0" = _foreach_div_2[121]
+	        getitem_2786: "f32[768][1]cuda:0" = _foreach_div_2[122]
+	        getitem_2787: "f32[768][1]cuda:0" = _foreach_div_2[123]
+	        getitem_2788: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[124]
+	        getitem_2789: "f32[2304][1]cuda:0" = _foreach_div_2[125]
+	        getitem_2790: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[126]
+	        getitem_2791: "f32[768][1]cuda:0" = _foreach_div_2[127]
+	        getitem_2792: "f32[768][1]cuda:0" = _foreach_div_2[128]
+	        getitem_2793: "f32[768][1]cuda:0" = _foreach_div_2[129]
+	        getitem_2794: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[130]
+	        getitem_2795: "f32[3072][1]cuda:0" = _foreach_div_2[131]
+	        getitem_2796: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[132]
+	        getitem_2797: "f32[768][1]cuda:0" = _foreach_div_2[133]
+	        getitem_2798: "f32[768][1]cuda:0" = _foreach_div_2[134]
+	        getitem_2799: "f32[768][1]cuda:0" = _foreach_div_2[135]
+	        getitem_2800: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[136]
+	        getitem_2801: "f32[2304][1]cuda:0" = _foreach_div_2[137]
+	        getitem_2802: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[138]
+	        getitem_2803: "f32[768][1]cuda:0" = _foreach_div_2[139]
+	        getitem_2804: "f32[768][1]cuda:0" = _foreach_div_2[140]
+	        getitem_2805: "f32[768][1]cuda:0" = _foreach_div_2[141]
+	        getitem_2806: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[142]
+	        getitem_2807: "f32[3072][1]cuda:0" = _foreach_div_2[143]
+	        getitem_2808: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[144]
+	        getitem_2809: "f32[768][1]cuda:0" = _foreach_div_2[145]
+	        getitem_2810: "f32[768][1]cuda:0" = _foreach_div_2[146]
+	        getitem_2811: "f32[768][1]cuda:0" = _foreach_div_2[147];  _foreach_div_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt)
+	        _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]);  getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None
+	        getitem_2812: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_3[0]
+	        getitem_2813: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_3[1]
+	        getitem_2814: "f32[768][1]cuda:0" = _foreach_div_3[2]
+	        getitem_2815: "f32[768][1]cuda:0" = _foreach_div_3[3]
+	        getitem_2816: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[4]
+	        getitem_2817: "f32[2304][1]cuda:0" = _foreach_div_3[5]
+	        getitem_2818: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[6]
+	        getitem_2819: "f32[768][1]cuda:0" = _foreach_div_3[7]
+	        getitem_2820: "f32[768][1]cuda:0" = _foreach_div_3[8]
+	        getitem_2821: "f32[768][1]cuda:0" = _foreach_div_3[9]
+	        getitem_2822: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[10]
+	        getitem_2823: "f32[3072][1]cuda:0" = _foreach_div_3[11]
+	        getitem_2824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[12]
+	        getitem_2825: "f32[768][1]cuda:0" = _foreach_div_3[13]
+	        getitem_2826: "f32[768][1]cuda:0" = _foreach_div_3[14]
+	        getitem_2827: "f32[768][1]cuda:0" = _foreach_div_3[15]
+	        getitem_2828: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[16]
+	        getitem_2829: "f32[2304][1]cuda:0" = _foreach_div_3[17]
+	        getitem_2830: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[18]
+	        getitem_2831: "f32[768][1]cuda:0" = _foreach_div_3[19]
+	        getitem_2832: "f32[768][1]cuda:0" = _foreach_div_3[20]
+	        getitem_2833: "f32[768][1]cuda:0" = _foreach_div_3[21]
+	        getitem_2834: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[22]
+	        getitem_2835: "f32[3072][1]cuda:0" = _foreach_div_3[23]
+	        getitem_2836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[24]
+	        getitem_2837: "f32[768][1]cuda:0" = _foreach_div_3[25]
+	        getitem_2838: "f32[768][1]cuda:0" = _foreach_div_3[26]
+	        getitem_2839: "f32[768][1]cuda:0" = _foreach_div_3[27]
+	        getitem_2840: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[28]
+	        getitem_2841: "f32[2304][1]cuda:0" = _foreach_div_3[29]
+	        getitem_2842: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[30]
+	        getitem_2843: "f32[768][1]cuda:0" = _foreach_div_3[31]
+	        getitem_2844: "f32[768][1]cuda:0" = _foreach_div_3[32]
+	        getitem_2845: "f32[768][1]cuda:0" = _foreach_div_3[33]
+	        getitem_2846: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[34]
+	        getitem_2847: "f32[3072][1]cuda:0" = _foreach_div_3[35]
+	        getitem_2848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[36]
+	        getitem_2849: "f32[768][1]cuda:0" = _foreach_div_3[37]
+	        getitem_2850: "f32[768][1]cuda:0" = _foreach_div_3[38]
+	        getitem_2851: "f32[768][1]cuda:0" = _foreach_div_3[39]
+	        getitem_2852: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[40]
+	        getitem_2853: "f32[2304][1]cuda:0" = _foreach_div_3[41]
+	        getitem_2854: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[42]
+	        getitem_2855: "f32[768][1]cuda:0" = _foreach_div_3[43]
+	        getitem_2856: "f32[768][1]cuda:0" = _foreach_div_3[44]
+	        getitem_2857: "f32[768][1]cuda:0" = _foreach_div_3[45]
+	        getitem_2858: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[46]
+	        getitem_2859: "f32[3072][1]cuda:0" = _foreach_div_3[47]
+	        getitem_2860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[48]
+	        getitem_2861: "f32[768][1]cuda:0" = _foreach_div_3[49]
+	        getitem_2862: "f32[768][1]cuda:0" = _foreach_div_3[50]
+	        getitem_2863: "f32[768][1]cuda:0" = _foreach_div_3[51]
+	        getitem_2864: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[52]
+	        getitem_2865: "f32[2304][1]cuda:0" = _foreach_div_3[53]
+	        getitem_2866: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[54]
+	        getitem_2867: "f32[768][1]cuda:0" = _foreach_div_3[55]
+	        getitem_2868: "f32[768][1]cuda:0" = _foreach_div_3[56]
+	        getitem_2869: "f32[768][1]cuda:0" = _foreach_div_3[57]
+	        getitem_2870: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[58]
+	        getitem_2871: "f32[3072][1]cuda:0" = _foreach_div_3[59]
+	        getitem_2872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[60]
+	        getitem_2873: "f32[768][1]cuda:0" = _foreach_div_3[61]
+	        getitem_2874: "f32[768][1]cuda:0" = _foreach_div_3[62]
+	        getitem_2875: "f32[768][1]cuda:0" = _foreach_div_3[63]
+	        getitem_2876: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[64]
+	        getitem_2877: "f32[2304][1]cuda:0" = _foreach_div_3[65]
+	        getitem_2878: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[66]
+	        getitem_2879: "f32[768][1]cuda:0" = _foreach_div_3[67]
+	        getitem_2880: "f32[768][1]cuda:0" = _foreach_div_3[68]
+	        getitem_2881: "f32[768][1]cuda:0" = _foreach_div_3[69]
+	        getitem_2882: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[70]
+	        getitem_2883: "f32[3072][1]cuda:0" = _foreach_div_3[71]
+	        getitem_2884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[72]
+	        getitem_2885: "f32[768][1]cuda:0" = _foreach_div_3[73]
+	        getitem_2886: "f32[768][1]cuda:0" = _foreach_div_3[74]
+	        getitem_2887: "f32[768][1]cuda:0" = _foreach_div_3[75]
+	        getitem_2888: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[76]
+	        getitem_2889: "f32[2304][1]cuda:0" = _foreach_div_3[77]
+	        getitem_2890: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[78]
+	        getitem_2891: "f32[768][1]cuda:0" = _foreach_div_3[79]
+	        getitem_2892: "f32[768][1]cuda:0" = _foreach_div_3[80]
+	        getitem_2893: "f32[768][1]cuda:0" = _foreach_div_3[81]
+	        getitem_2894: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[82]
+	        getitem_2895: "f32[3072][1]cuda:0" = _foreach_div_3[83]
+	        getitem_2896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[84]
+	        getitem_2897: "f32[768][1]cuda:0" = _foreach_div_3[85]
+	        getitem_2898: "f32[768][1]cuda:0" = _foreach_div_3[86]
+	        getitem_2899: "f32[768][1]cuda:0" = _foreach_div_3[87]
+	        getitem_2900: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[88]
+	        getitem_2901: "f32[2304][1]cuda:0" = _foreach_div_3[89]
+	        getitem_2902: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[90]
+	        getitem_2903: "f32[768][1]cuda:0" = _foreach_div_3[91]
+	        getitem_2904: "f32[768][1]cuda:0" = _foreach_div_3[92]
+	        getitem_2905: "f32[768][1]cuda:0" = _foreach_div_3[93]
+	        getitem_2906: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[94]
+	        getitem_2907: "f32[3072][1]cuda:0" = _foreach_div_3[95]
+	        getitem_2908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[96]
+	        getitem_2909: "f32[768][1]cuda:0" = _foreach_div_3[97]
+	        getitem_2910: "f32[768][1]cuda:0" = _foreach_div_3[98]
+	        getitem_2911: "f32[768][1]cuda:0" = _foreach_div_3[99]
+	        getitem_2912: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[100]
+	        getitem_2913: "f32[2304][1]cuda:0" = _foreach_div_3[101]
+	        getitem_2914: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[102]
+	        getitem_2915: "f32[768][1]cuda:0" = _foreach_div_3[103]
+	        getitem_2916: "f32[768][1]cuda:0" = _foreach_div_3[104]
+	        getitem_2917: "f32[768][1]cuda:0" = _foreach_div_3[105]
+	        getitem_2918: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[106]
+	        getitem_2919: "f32[3072][1]cuda:0" = _foreach_div_3[107]
+	        getitem_2920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[108]
+	        getitem_2921: "f32[768][1]cuda:0" = _foreach_div_3[109]
+	        getitem_2922: "f32[768][1]cuda:0" = _foreach_div_3[110]
+	        getitem_2923: "f32[768][1]cuda:0" = _foreach_div_3[111]
+	        getitem_2924: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[112]
+	        getitem_2925: "f32[2304][1]cuda:0" = _foreach_div_3[113]
+	        getitem_2926: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[114]
+	        getitem_2927: "f32[768][1]cuda:0" = _foreach_div_3[115]
+	        getitem_2928: "f32[768][1]cuda:0" = _foreach_div_3[116]
+	        getitem_2929: "f32[768][1]cuda:0" = _foreach_div_3[117]
+	        getitem_2930: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[118]
+	        getitem_2931: "f32[3072][1]cuda:0" = _foreach_div_3[119]
+	        getitem_2932: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[120]
+	        getitem_2933: "f32[768][1]cuda:0" = _foreach_div_3[121]
+	        getitem_2934: "f32[768][1]cuda:0" = _foreach_div_3[122]
+	        getitem_2935: "f32[768][1]cuda:0" = _foreach_div_3[123]
+	        getitem_2936: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[124]
+	        getitem_2937: "f32[2304][1]cuda:0" = _foreach_div_3[125]
+	        getitem_2938: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[126]
+	        getitem_2939: "f32[768][1]cuda:0" = _foreach_div_3[127]
+	        getitem_2940: "f32[768][1]cuda:0" = _foreach_div_3[128]
+	        getitem_2941: "f32[768][1]cuda:0" = _foreach_div_3[129]
+	        getitem_2942: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[130]
+	        getitem_2943: "f32[3072][1]cuda:0" = _foreach_div_3[131]
+	        getitem_2944: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[132]
+	        getitem_2945: "f32[768][1]cuda:0" = _foreach_div_3[133]
+	        getitem_2946: "f32[768][1]cuda:0" = _foreach_div_3[134]
+	        getitem_2947: "f32[768][1]cuda:0" = _foreach_div_3[135]
+	        getitem_2948: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[136]
+	        getitem_2949: "f32[2304][1]cuda:0" = _foreach_div_3[137]
+	        getitem_2950: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[138]
+	        getitem_2951: "f32[768][1]cuda:0" = _foreach_div_3[139]
+	        getitem_2952: "f32[768][1]cuda:0" = _foreach_div_3[140]
+	        getitem_2953: "f32[768][1]cuda:0" = _foreach_div_3[141]
+	        getitem_2954: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[142]
+	        getitem_2955: "f32[3072][1]cuda:0" = _foreach_div_3[143]
+	        getitem_2956: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[144]
+	        getitem_2957: "f32[768][1]cuda:0" = _foreach_div_3[145]
+	        getitem_2958: "f32[768][1]cuda:0" = _foreach_div_3[146]
+	        getitem_2959: "f32[768][1]cuda:0" = _foreach_div_3[147];  _foreach_div_3 = None
+	        _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]);  getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None
+	        getitem_2960: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_4[0]
+	        getitem_2961: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_4[1]
+	        getitem_2962: "f32[768][1]cuda:0" = _foreach_add_4[2]
+	        getitem_2963: "f32[768][1]cuda:0" = _foreach_add_4[3]
+	        getitem_2964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[4]
+	        getitem_2965: "f32[2304][1]cuda:0" = _foreach_add_4[5]
+	        getitem_2966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[6]
+	        getitem_2967: "f32[768][1]cuda:0" = _foreach_add_4[7]
+	        getitem_2968: "f32[768][1]cuda:0" = _foreach_add_4[8]
+	        getitem_2969: "f32[768][1]cuda:0" = _foreach_add_4[9]
+	        getitem_2970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[10]
+	        getitem_2971: "f32[3072][1]cuda:0" = _foreach_add_4[11]
+	        getitem_2972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[12]
+	        getitem_2973: "f32[768][1]cuda:0" = _foreach_add_4[13]
+	        getitem_2974: "f32[768][1]cuda:0" = _foreach_add_4[14]
+	        getitem_2975: "f32[768][1]cuda:0" = _foreach_add_4[15]
+	        getitem_2976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[16]
+	        getitem_2977: "f32[2304][1]cuda:0" = _foreach_add_4[17]
+	        getitem_2978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[18]
+	        getitem_2979: "f32[768][1]cuda:0" = _foreach_add_4[19]
+	        getitem_2980: "f32[768][1]cuda:0" = _foreach_add_4[20]
+	        getitem_2981: "f32[768][1]cuda:0" = _foreach_add_4[21]
+	        getitem_2982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[22]
+	        getitem_2983: "f32[3072][1]cuda:0" = _foreach_add_4[23]
+	        getitem_2984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[24]
+	        getitem_2985: "f32[768][1]cuda:0" = _foreach_add_4[25]
+	        getitem_2986: "f32[768][1]cuda:0" = _foreach_add_4[26]
+	        getitem_2987: "f32[768][1]cuda:0" = _foreach_add_4[27]
+	        getitem_2988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[28]
+	        getitem_2989: "f32[2304][1]cuda:0" = _foreach_add_4[29]
+	        getitem_2990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[30]
+	        getitem_2991: "f32[768][1]cuda:0" = _foreach_add_4[31]
+	        getitem_2992: "f32[768][1]cuda:0" = _foreach_add_4[32]
+	        getitem_2993: "f32[768][1]cuda:0" = _foreach_add_4[33]
+	        getitem_2994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[34]
+	        getitem_2995: "f32[3072][1]cuda:0" = _foreach_add_4[35]
+	        getitem_2996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[36]
+	        getitem_2997: "f32[768][1]cuda:0" = _foreach_add_4[37]
+	        getitem_2998: "f32[768][1]cuda:0" = _foreach_add_4[38]
+	        getitem_2999: "f32[768][1]cuda:0" = _foreach_add_4[39]
+	        getitem_3000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[40]
+	        getitem_3001: "f32[2304][1]cuda:0" = _foreach_add_4[41]
+	        getitem_3002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[42]
+	        getitem_3003: "f32[768][1]cuda:0" = _foreach_add_4[43]
+	        getitem_3004: "f32[768][1]cuda:0" = _foreach_add_4[44]
+	        getitem_3005: "f32[768][1]cuda:0" = _foreach_add_4[45]
+	        getitem_3006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[46]
+	        getitem_3007: "f32[3072][1]cuda:0" = _foreach_add_4[47]
+	        getitem_3008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[48]
+	        getitem_3009: "f32[768][1]cuda:0" = _foreach_add_4[49]
+	        getitem_3010: "f32[768][1]cuda:0" = _foreach_add_4[50]
+	        getitem_3011: "f32[768][1]cuda:0" = _foreach_add_4[51]
+	        getitem_3012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[52]
+	        getitem_3013: "f32[2304][1]cuda:0" = _foreach_add_4[53]
+	        getitem_3014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[54]
+	        getitem_3015: "f32[768][1]cuda:0" = _foreach_add_4[55]
+	        getitem_3016: "f32[768][1]cuda:0" = _foreach_add_4[56]
+	        getitem_3017: "f32[768][1]cuda:0" = _foreach_add_4[57]
+	        getitem_3018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[58]
+	        getitem_3019: "f32[3072][1]cuda:0" = _foreach_add_4[59]
+	        getitem_3020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[60]
+	        getitem_3021: "f32[768][1]cuda:0" = _foreach_add_4[61]
+	        getitem_3022: "f32[768][1]cuda:0" = _foreach_add_4[62]
+	        getitem_3023: "f32[768][1]cuda:0" = _foreach_add_4[63]
+	        getitem_3024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[64]
+	        getitem_3025: "f32[2304][1]cuda:0" = _foreach_add_4[65]
+	        getitem_3026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[66]
+	        getitem_3027: "f32[768][1]cuda:0" = _foreach_add_4[67]
+	        getitem_3028: "f32[768][1]cuda:0" = _foreach_add_4[68]
+	        getitem_3029: "f32[768][1]cuda:0" = _foreach_add_4[69]
+	        getitem_3030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[70]
+	        getitem_3031: "f32[3072][1]cuda:0" = _foreach_add_4[71]
+	        getitem_3032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[72]
+	        getitem_3033: "f32[768][1]cuda:0" = _foreach_add_4[73]
+	        getitem_3034: "f32[768][1]cuda:0" = _foreach_add_4[74]
+	        getitem_3035: "f32[768][1]cuda:0" = _foreach_add_4[75]
+	        getitem_3036: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[76]
+	        getitem_3037: "f32[2304][1]cuda:0" = _foreach_add_4[77]
+	        getitem_3038: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[78]
+	        getitem_3039: "f32[768][1]cuda:0" = _foreach_add_4[79]
+	        getitem_3040: "f32[768][1]cuda:0" = _foreach_add_4[80]
+	        getitem_3041: "f32[768][1]cuda:0" = _foreach_add_4[81]
+	        getitem_3042: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[82]
+	        getitem_3043: "f32[3072][1]cuda:0" = _foreach_add_4[83]
+	        getitem_3044: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[84]
+	        getitem_3045: "f32[768][1]cuda:0" = _foreach_add_4[85]
+	        getitem_3046: "f32[768][1]cuda:0" = _foreach_add_4[86]
+	        getitem_3047: "f32[768][1]cuda:0" = _foreach_add_4[87]
+	        getitem_3048: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[88]
+	        getitem_3049: "f32[2304][1]cuda:0" = _foreach_add_4[89]
+	        getitem_3050: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[90]
+	        getitem_3051: "f32[768][1]cuda:0" = _foreach_add_4[91]
+	        getitem_3052: "f32[768][1]cuda:0" = _foreach_add_4[92]
+	        getitem_3053: "f32[768][1]cuda:0" = _foreach_add_4[93]
+	        getitem_3054: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[94]
+	        getitem_3055: "f32[3072][1]cuda:0" = _foreach_add_4[95]
+	        getitem_3056: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[96]
+	        getitem_3057: "f32[768][1]cuda:0" = _foreach_add_4[97]
+	        getitem_3058: "f32[768][1]cuda:0" = _foreach_add_4[98]
+	        getitem_3059: "f32[768][1]cuda:0" = _foreach_add_4[99]
+	        getitem_3060: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[100]
+	        getitem_3061: "f32[2304][1]cuda:0" = _foreach_add_4[101]
+	        getitem_3062: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[102]
+	        getitem_3063: "f32[768][1]cuda:0" = _foreach_add_4[103]
+	        getitem_3064: "f32[768][1]cuda:0" = _foreach_add_4[104]
+	        getitem_3065: "f32[768][1]cuda:0" = _foreach_add_4[105]
+	        getitem_3066: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[106]
+	        getitem_3067: "f32[3072][1]cuda:0" = _foreach_add_4[107]
+	        getitem_3068: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[108]
+	        getitem_3069: "f32[768][1]cuda:0" = _foreach_add_4[109]
+	        getitem_3070: "f32[768][1]cuda:0" = _foreach_add_4[110]
+	        getitem_3071: "f32[768][1]cuda:0" = _foreach_add_4[111]
+	        getitem_3072: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[112]
+	        getitem_3073: "f32[2304][1]cuda:0" = _foreach_add_4[113]
+	        getitem_3074: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[114]
+	        getitem_3075: "f32[768][1]cuda:0" = _foreach_add_4[115]
+	        getitem_3076: "f32[768][1]cuda:0" = _foreach_add_4[116]
+	        getitem_3077: "f32[768][1]cuda:0" = _foreach_add_4[117]
+	        getitem_3078: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[118]
+	        getitem_3079: "f32[3072][1]cuda:0" = _foreach_add_4[119]
+	        getitem_3080: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[120]
+	        getitem_3081: "f32[768][1]cuda:0" = _foreach_add_4[121]
+	        getitem_3082: "f32[768][1]cuda:0" = _foreach_add_4[122]
+	        getitem_3083: "f32[768][1]cuda:0" = _foreach_add_4[123]
+	        getitem_3084: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[124]
+	        getitem_3085: "f32[2304][1]cuda:0" = _foreach_add_4[125]
+	        getitem_3086: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[126]
+	        getitem_3087: "f32[768][1]cuda:0" = _foreach_add_4[127]
+	        getitem_3088: "f32[768][1]cuda:0" = _foreach_add_4[128]
+	        getitem_3089: "f32[768][1]cuda:0" = _foreach_add_4[129]
+	        getitem_3090: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[130]
+	        getitem_3091: "f32[3072][1]cuda:0" = _foreach_add_4[131]
+	        getitem_3092: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[132]
+	        getitem_3093: "f32[768][1]cuda:0" = _foreach_add_4[133]
+	        getitem_3094: "f32[768][1]cuda:0" = _foreach_add_4[134]
+	        getitem_3095: "f32[768][1]cuda:0" = _foreach_add_4[135]
+	        getitem_3096: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[136]
+	        getitem_3097: "f32[2304][1]cuda:0" = _foreach_add_4[137]
+	        getitem_3098: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[138]
+	        getitem_3099: "f32[768][1]cuda:0" = _foreach_add_4[139]
+	        getitem_3100: "f32[768][1]cuda:0" = _foreach_add_4[140]
+	        getitem_3101: "f32[768][1]cuda:0" = _foreach_add_4[141]
+	        getitem_3102: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[142]
+	        getitem_3103: "f32[3072][1]cuda:0" = _foreach_add_4[143]
+	        getitem_3104: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[144]
+	        getitem_3105: "f32[768][1]cuda:0" = _foreach_add_4[145]
+	        getitem_3106: "f32[768][1]cuda:0" = _foreach_add_4[146]
+	        getitem_3107: "f32[768][1]cuda:0" = _foreach_add_4[147];  _foreach_add_4 = None
+	        copy_: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg0_1, getitem_2960);  arg0_1 = getitem_2960 = copy_ = None
+	        copy__1: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg1_1, getitem_2961);  arg1_1 = getitem_2961 = copy__1 = None
+	        copy__2: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg2_1, getitem_2962);  arg2_1 = getitem_2962 = copy__2 = None
+	        copy__3: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg3_1, getitem_2963);  arg3_1 = getitem_2963 = copy__3 = None
+	        copy__4: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg4_1, getitem_2964);  arg4_1 = getitem_2964 = copy__4 = None
+	        copy__5: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg5_1, getitem_2965);  arg5_1 = getitem_2965 = copy__5 = None
+	        copy__6: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg6_1, getitem_2966);  arg6_1 = getitem_2966 = copy__6 = None
+	        copy__7: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg7_1, getitem_2967);  arg7_1 = getitem_2967 = copy__7 = None
+	        copy__8: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg8_1, getitem_2968);  arg8_1 = getitem_2968 = copy__8 = None
+	        copy__9: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg9_1, getitem_2969);  arg9_1 = getitem_2969 = copy__9 = None
+	        copy__10: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg10_1, getitem_2970);  arg10_1 = getitem_2970 = copy__10 = None
+	        copy__11: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg11_1, getitem_2971);  arg11_1 = getitem_2971 = copy__11 = None
+	        copy__12: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg12_1, getitem_2972);  arg12_1 = getitem_2972 = copy__12 = None
+	        copy__13: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg13_1, getitem_2973);  arg13_1 = getitem_2973 = copy__13 = None
+	        copy__14: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg14_1, getitem_2974);  arg14_1 = getitem_2974 = copy__14 = None
+	        copy__15: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg15_1, getitem_2975);  arg15_1 = getitem_2975 = copy__15 = None
+	        copy__16: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg16_1, getitem_2976);  arg16_1 = getitem_2976 = copy__16 = None
+	        copy__17: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg17_1, getitem_2977);  arg17_1 = getitem_2977 = copy__17 = None
+	        copy__18: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg18_1, getitem_2978);  arg18_1 = getitem_2978 = copy__18 = None
+	        copy__19: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg19_1, getitem_2979);  arg19_1 = getitem_2979 = copy__19 = None
+	        copy__20: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg20_1, getitem_2980);  arg20_1 = getitem_2980 = copy__20 = None
+	        copy__21: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg21_1, getitem_2981);  arg21_1 = getitem_2981 = copy__21 = None
+	        copy__22: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg22_1, getitem_2982);  arg22_1 = getitem_2982 = copy__22 = None
+	        copy__23: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg23_1, getitem_2983);  arg23_1 = getitem_2983 = copy__23 = None
+	        copy__24: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg24_1, getitem_2984);  arg24_1 = getitem_2984 = copy__24 = None
+	        copy__25: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg25_1, getitem_2985);  arg25_1 = getitem_2985 = copy__25 = None
+	        copy__26: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg26_1, getitem_2986);  arg26_1 = getitem_2986 = copy__26 = None
+	        copy__27: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg27_1, getitem_2987);  arg27_1 = getitem_2987 = copy__27 = None
+	        copy__28: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg28_1, getitem_2988);  arg28_1 = getitem_2988 = copy__28 = None
+	        copy__29: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg29_1, getitem_2989);  arg29_1 = getitem_2989 = copy__29 = None
+	        copy__30: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg30_1, getitem_2990);  arg30_1 = getitem_2990 = copy__30 = None
+	        copy__31: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg31_1, getitem_2991);  arg31_1 = getitem_2991 = copy__31 = None
+	        copy__32: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg32_1, getitem_2992);  arg32_1 = getitem_2992 = copy__32 = None
+	        copy__33: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg33_1, getitem_2993);  arg33_1 = getitem_2993 = copy__33 = None
+	        copy__34: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg34_1, getitem_2994);  arg34_1 = getitem_2994 = copy__34 = None
+	        copy__35: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg35_1, getitem_2995);  arg35_1 = getitem_2995 = copy__35 = None
+	        copy__36: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg36_1, getitem_2996);  arg36_1 = getitem_2996 = copy__36 = None
+	        copy__37: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg37_1, getitem_2997);  arg37_1 = getitem_2997 = copy__37 = None
+	        copy__38: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg38_1, getitem_2998);  arg38_1 = getitem_2998 = copy__38 = None
+	        copy__39: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg39_1, getitem_2999);  arg39_1 = getitem_2999 = copy__39 = None
+	        copy__40: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg40_1, getitem_3000);  arg40_1 = getitem_3000 = copy__40 = None
+	        copy__41: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg41_1, getitem_3001);  arg41_1 = getitem_3001 = copy__41 = None
+	        copy__42: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg42_1, getitem_3002);  arg42_1 = getitem_3002 = copy__42 = None
+	        copy__43: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg43_1, getitem_3003);  arg43_1 = getitem_3003 = copy__43 = None
+	        copy__44: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg44_1, getitem_3004);  arg44_1 = getitem_3004 = copy__44 = None
+	        copy__45: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg45_1, getitem_3005);  arg45_1 = getitem_3005 = copy__45 = None
+	        copy__46: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg46_1, getitem_3006);  arg46_1 = getitem_3006 = copy__46 = None
+	        copy__47: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg47_1, getitem_3007);  arg47_1 = getitem_3007 = copy__47 = None
+	        copy__48: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg48_1, getitem_3008);  arg48_1 = getitem_3008 = copy__48 = None
+	        copy__49: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg49_1, getitem_3009);  arg49_1 = getitem_3009 = copy__49 = None
+	        copy__50: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg50_1, getitem_3010);  arg50_1 = getitem_3010 = copy__50 = None
+	        copy__51: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg51_1, getitem_3011);  arg51_1 = getitem_3011 = copy__51 = None
+	        copy__52: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg52_1, getitem_3012);  arg52_1 = getitem_3012 = copy__52 = None
+	        copy__53: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg53_1, getitem_3013);  arg53_1 = getitem_3013 = copy__53 = None
+	        copy__54: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg54_1, getitem_3014);  arg54_1 = getitem_3014 = copy__54 = None
+	        copy__55: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg55_1, getitem_3015);  arg55_1 = getitem_3015 = copy__55 = None
+	        copy__56: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg56_1, getitem_3016);  arg56_1 = getitem_3016 = copy__56 = None
+	        copy__57: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg57_1, getitem_3017);  arg57_1 = getitem_3017 = copy__57 = None
+	        copy__58: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg58_1, getitem_3018);  arg58_1 = getitem_3018 = copy__58 = None
+	        copy__59: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg59_1, getitem_3019);  arg59_1 = getitem_3019 = copy__59 = None
+	        copy__60: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg60_1, getitem_3020);  arg60_1 = getitem_3020 = copy__60 = None
+	        copy__61: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg61_1, getitem_3021);  arg61_1 = getitem_3021 = copy__61 = None
+	        copy__62: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg62_1, getitem_3022);  arg62_1 = getitem_3022 = copy__62 = None
+	        copy__63: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg63_1, getitem_3023);  arg63_1 = getitem_3023 = copy__63 = None
+	        copy__64: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg64_1, getitem_3024);  arg64_1 = getitem_3024 = copy__64 = None
+	        copy__65: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg65_1, getitem_3025);  arg65_1 = getitem_3025 = copy__65 = None
+	        copy__66: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg66_1, getitem_3026);  arg66_1 = getitem_3026 = copy__66 = None
+	        copy__67: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg67_1, getitem_3027);  arg67_1 = getitem_3027 = copy__67 = None
+	        copy__68: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg68_1, getitem_3028);  arg68_1 = getitem_3028 = copy__68 = None
+	        copy__69: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg69_1, getitem_3029);  arg69_1 = getitem_3029 = copy__69 = None
+	        copy__70: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg70_1, getitem_3030);  arg70_1 = getitem_3030 = copy__70 = None
+	        copy__71: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg71_1, getitem_3031);  arg71_1 = getitem_3031 = copy__71 = None
+	        copy__72: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg72_1, getitem_3032);  arg72_1 = getitem_3032 = copy__72 = None
+	        copy__73: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg73_1, getitem_3033);  arg73_1 = getitem_3033 = copy__73 = None
+	        copy__74: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg74_1, getitem_3034);  arg74_1 = getitem_3034 = copy__74 = None
+	        copy__75: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg75_1, getitem_3035);  arg75_1 = getitem_3035 = copy__75 = None
+	        copy__76: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg76_1, getitem_3036);  arg76_1 = getitem_3036 = copy__76 = None
+	        copy__77: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg77_1, getitem_3037);  arg77_1 = getitem_3037 = copy__77 = None
+	        copy__78: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg78_1, getitem_3038);  arg78_1 = getitem_3038 = copy__78 = None
+	        copy__79: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg79_1, getitem_3039);  arg79_1 = getitem_3039 = copy__79 = None
+	        copy__80: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg80_1, getitem_3040);  arg80_1 = getitem_3040 = copy__80 = None
+	        copy__81: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg81_1, getitem_3041);  arg81_1 = getitem_3041 = copy__81 = None
+	        copy__82: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg82_1, getitem_3042);  arg82_1 = getitem_3042 = copy__82 = None
+	        copy__83: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg83_1, getitem_3043);  arg83_1 = getitem_3043 = copy__83 = None
+	        copy__84: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg84_1, getitem_3044);  arg84_1 = getitem_3044 = copy__84 = None
+	        copy__85: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg85_1, getitem_3045);  arg85_1 = getitem_3045 = copy__85 = None
+	        copy__86: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg86_1, getitem_3046);  arg86_1 = getitem_3046 = copy__86 = None
+	        copy__87: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg87_1, getitem_3047);  arg87_1 = getitem_3047 = copy__87 = None
+	        copy__88: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg88_1, getitem_3048);  arg88_1 = getitem_3048 = copy__88 = None
+	        copy__89: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg89_1, getitem_3049);  arg89_1 = getitem_3049 = copy__89 = None
+	        copy__90: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg90_1, getitem_3050);  arg90_1 = getitem_3050 = copy__90 = None
+	        copy__91: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg91_1, getitem_3051);  arg91_1 = getitem_3051 = copy__91 = None
+	        copy__92: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg92_1, getitem_3052);  arg92_1 = getitem_3052 = copy__92 = None
+	        copy__93: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg93_1, getitem_3053);  arg93_1 = getitem_3053 = copy__93 = None
+	        copy__94: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg94_1, getitem_3054);  arg94_1 = getitem_3054 = copy__94 = None
+	        copy__95: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg95_1, getitem_3055);  arg95_1 = getitem_3055 = copy__95 = None
+	        copy__96: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg96_1, getitem_3056);  arg96_1 = getitem_3056 = copy__96 = None
+	        copy__97: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg97_1, getitem_3057);  arg97_1 = getitem_3057 = copy__97 = None
+	        copy__98: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg98_1, getitem_3058);  arg98_1 = getitem_3058 = copy__98 = None
+	        copy__99: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg99_1, getitem_3059);  arg99_1 = getitem_3059 = copy__99 = None
+	        copy__100: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg100_1, getitem_3060);  arg100_1 = getitem_3060 = copy__100 = None
+	        copy__101: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg101_1, getitem_3061);  arg101_1 = getitem_3061 = copy__101 = None
+	        copy__102: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg102_1, getitem_3062);  arg102_1 = getitem_3062 = copy__102 = None
+	        copy__103: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg103_1, getitem_3063);  arg103_1 = getitem_3063 = copy__103 = None
+	        copy__104: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg104_1, getitem_3064);  arg104_1 = getitem_3064 = copy__104 = None
+	        copy__105: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg105_1, getitem_3065);  arg105_1 = getitem_3065 = copy__105 = None
+	        copy__106: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg106_1, getitem_3066);  arg106_1 = getitem_3066 = copy__106 = None
+	        copy__107: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg107_1, getitem_3067);  arg107_1 = getitem_3067 = copy__107 = None
+	        copy__108: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg108_1, getitem_3068);  arg108_1 = getitem_3068 = copy__108 = None
+	        copy__109: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg109_1, getitem_3069);  arg109_1 = getitem_3069 = copy__109 = None
+	        copy__110: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg110_1, getitem_3070);  arg110_1 = getitem_3070 = copy__110 = None
+	        copy__111: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg111_1, getitem_3071);  arg111_1 = getitem_3071 = copy__111 = None
+	        copy__112: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg112_1, getitem_3072);  arg112_1 = getitem_3072 = copy__112 = None
+	        copy__113: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg113_1, getitem_3073);  arg113_1 = getitem_3073 = copy__113 = None
+	        copy__114: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg114_1, getitem_3074);  arg114_1 = getitem_3074 = copy__114 = None
+	        copy__115: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg115_1, getitem_3075);  arg115_1 = getitem_3075 = copy__115 = None
+	        copy__116: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg116_1, getitem_3076);  arg116_1 = getitem_3076 = copy__116 = None
+	        copy__117: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg117_1, getitem_3077);  arg117_1 = getitem_3077 = copy__117 = None
+	        copy__118: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg118_1, getitem_3078);  arg118_1 = getitem_3078 = copy__118 = None
+	        copy__119: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg119_1, getitem_3079);  arg119_1 = getitem_3079 = copy__119 = None
+	        copy__120: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg120_1, getitem_3080);  arg120_1 = getitem_3080 = copy__120 = None
+	        copy__121: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg121_1, getitem_3081);  arg121_1 = getitem_3081 = copy__121 = None
+	        copy__122: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg122_1, getitem_3082);  arg122_1 = getitem_3082 = copy__122 = None
+	        copy__123: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg123_1, getitem_3083);  arg123_1 = getitem_3083 = copy__123 = None
+	        copy__124: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg124_1, getitem_3084);  arg124_1 = getitem_3084 = copy__124 = None
+	        copy__125: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg125_1, getitem_3085);  arg125_1 = getitem_3085 = copy__125 = None
+	        copy__126: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg126_1, getitem_3086);  arg126_1 = getitem_3086 = copy__126 = None
+	        copy__127: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg127_1, getitem_3087);  arg127_1 = getitem_3087 = copy__127 = None
+	        copy__128: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg128_1, getitem_3088);  arg128_1 = getitem_3088 = copy__128 = None
+	        copy__129: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg129_1, getitem_3089);  arg129_1 = getitem_3089 = copy__129 = None
+	        copy__130: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg130_1, getitem_3090);  arg130_1 = getitem_3090 = copy__130 = None
+	        copy__131: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg131_1, getitem_3091);  arg131_1 = getitem_3091 = copy__131 = None
+	        copy__132: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg132_1, getitem_3092);  arg132_1 = getitem_3092 = copy__132 = None
+	        copy__133: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg133_1, getitem_3093);  arg133_1 = getitem_3093 = copy__133 = None
+	        copy__134: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg134_1, getitem_3094);  arg134_1 = getitem_3094 = copy__134 = None
+	        copy__135: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg135_1, getitem_3095);  arg135_1 = getitem_3095 = copy__135 = None
+	        copy__136: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg136_1, getitem_3096);  arg136_1 = getitem_3096 = copy__136 = None
+	        copy__137: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg137_1, getitem_3097);  arg137_1 = getitem_3097 = copy__137 = None
+	        copy__138: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg138_1, getitem_3098);  arg138_1 = getitem_3098 = copy__138 = None
+	        copy__139: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg139_1, getitem_3099);  arg139_1 = getitem_3099 = copy__139 = None
+	        copy__140: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg140_1, getitem_3100);  arg140_1 = getitem_3100 = copy__140 = None
+	        copy__141: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg141_1, getitem_3101);  arg141_1 = getitem_3101 = copy__141 = None
+	        copy__142: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg142_1, getitem_3102);  arg142_1 = getitem_3102 = copy__142 = None
+	        copy__143: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg143_1, getitem_3103);  arg143_1 = getitem_3103 = copy__143 = None
+	        copy__144: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg144_1, getitem_3104);  arg144_1 = getitem_3104 = copy__144 = None
+	        copy__145: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg145_1, getitem_3105);  arg145_1 = getitem_3105 = copy__145 = None
+	        copy__146: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg146_1, getitem_3106);  arg146_1 = getitem_3106 = copy__146 = None
+	        copy__147: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg147_1, getitem_3107);  arg147_1 = getitem_3107 = copy__147 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1)
+	        copy__148: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg148_1, getitem_1);  arg148_1 = getitem_1 = copy__148 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
+	        copy__149: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg149_1, getitem_445);  arg149_1 = getitem_445 = copy__149 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        copy__150: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg150_1, getitem_889);  arg150_1 = getitem_889 = copy__150 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
+	        copy__151: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg299_1, getitem_444);  arg299_1 = getitem_444 = copy__151 = None
+	        copy__152: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg300_1, getitem_446);  arg300_1 = getitem_446 = copy__152 = None
+	        copy__153: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg301_1, getitem_447);  arg301_1 = getitem_447 = copy__153 = None
+	        copy__154: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg302_1, getitem_448);  arg302_1 = getitem_448 = copy__154 = None
+	        copy__155: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg303_1, getitem_449);  arg303_1 = getitem_449 = copy__155 = None
+	        copy__156: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg304_1, getitem_450);  arg304_1 = getitem_450 = copy__156 = None
+	        copy__157: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg305_1, getitem_451);  arg305_1 = getitem_451 = copy__157 = None
+	        copy__158: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg306_1, getitem_452);  arg306_1 = getitem_452 = copy__158 = None
+	        copy__159: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg307_1, getitem_453);  arg307_1 = getitem_453 = copy__159 = None
+	        copy__160: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg308_1, getitem_454);  arg308_1 = getitem_454 = copy__160 = None
+	        copy__161: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg309_1, getitem_455);  arg309_1 = getitem_455 = copy__161 = None
+	        copy__162: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg310_1, getitem_456);  arg310_1 = getitem_456 = copy__162 = None
+	        copy__163: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg311_1, getitem_457);  arg311_1 = getitem_457 = copy__163 = None
+	        copy__164: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg312_1, getitem_458);  arg312_1 = getitem_458 = copy__164 = None
+	        copy__165: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg313_1, getitem_459);  arg313_1 = getitem_459 = copy__165 = None
+	        copy__166: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg314_1, getitem_460);  arg314_1 = getitem_460 = copy__166 = None
+	        copy__167: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg315_1, getitem_461);  arg315_1 = getitem_461 = copy__167 = None
+	        copy__168: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg316_1, getitem_462);  arg316_1 = getitem_462 = copy__168 = None
+	        copy__169: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg317_1, getitem_463);  arg317_1 = getitem_463 = copy__169 = None
+	        copy__170: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg318_1, getitem_464);  arg318_1 = getitem_464 = copy__170 = None
+	        copy__171: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg319_1, getitem_465);  arg319_1 = getitem_465 = copy__171 = None
+	        copy__172: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg320_1, getitem_466);  arg320_1 = getitem_466 = copy__172 = None
+	        copy__173: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg321_1, getitem_467);  arg321_1 = getitem_467 = copy__173 = None
+	        copy__174: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg322_1, getitem_468);  arg322_1 = getitem_468 = copy__174 = None
+	        copy__175: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg323_1, getitem_469);  arg323_1 = getitem_469 = copy__175 = None
+	        copy__176: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg324_1, getitem_470);  arg324_1 = getitem_470 = copy__176 = None
+	        copy__177: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg325_1, getitem_471);  arg325_1 = getitem_471 = copy__177 = None
+	        copy__178: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg326_1, getitem_472);  arg326_1 = getitem_472 = copy__178 = None
+	        copy__179: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg327_1, getitem_473);  arg327_1 = getitem_473 = copy__179 = None
+	        copy__180: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg328_1, getitem_474);  arg328_1 = getitem_474 = copy__180 = None
+	        copy__181: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg329_1, getitem_475);  arg329_1 = getitem_475 = copy__181 = None
+	        copy__182: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg330_1, getitem_476);  arg330_1 = getitem_476 = copy__182 = None
+	        copy__183: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg331_1, getitem_477);  arg331_1 = getitem_477 = copy__183 = None
+	        copy__184: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg332_1, getitem_478);  arg332_1 = getitem_478 = copy__184 = None
+	        copy__185: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg333_1, getitem_479);  arg333_1 = getitem_479 = copy__185 = None
+	        copy__186: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg334_1, getitem_480);  arg334_1 = getitem_480 = copy__186 = None
+	        copy__187: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg335_1, getitem_481);  arg335_1 = getitem_481 = copy__187 = None
+	        copy__188: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg336_1, getitem_482);  arg336_1 = getitem_482 = copy__188 = None
+	        copy__189: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg337_1, getitem_483);  arg337_1 = getitem_483 = copy__189 = None
+	        copy__190: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg338_1, getitem_484);  arg338_1 = getitem_484 = copy__190 = None
+	        copy__191: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg339_1, getitem_485);  arg339_1 = getitem_485 = copy__191 = None
+	        copy__192: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg340_1, getitem_486);  arg340_1 = getitem_486 = copy__192 = None
+	        copy__193: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg341_1, getitem_487);  arg341_1 = getitem_487 = copy__193 = None
+	        copy__194: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg342_1, getitem_488);  arg342_1 = getitem_488 = copy__194 = None
+	        copy__195: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg343_1, getitem_489);  arg343_1 = getitem_489 = copy__195 = None
+	        copy__196: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg344_1, getitem_490);  arg344_1 = getitem_490 = copy__196 = None
+	        copy__197: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg345_1, getitem_491);  arg345_1 = getitem_491 = copy__197 = None
+	        copy__198: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg346_1, getitem_492);  arg346_1 = getitem_492 = copy__198 = None
+	        copy__199: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg347_1, getitem_493);  arg347_1 = getitem_493 = copy__199 = None
+	        copy__200: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg348_1, getitem_494);  arg348_1 = getitem_494 = copy__200 = None
+	        copy__201: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg349_1, getitem_495);  arg349_1 = getitem_495 = copy__201 = None
+	        copy__202: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg350_1, getitem_496);  arg350_1 = getitem_496 = copy__202 = None
+	        copy__203: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg351_1, getitem_497);  arg351_1 = getitem_497 = copy__203 = None
+	        copy__204: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg352_1, getitem_498);  arg352_1 = getitem_498 = copy__204 = None
+	        copy__205: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg353_1, getitem_499);  arg353_1 = getitem_499 = copy__205 = None
+	        copy__206: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg354_1, getitem_500);  arg354_1 = getitem_500 = copy__206 = None
+	        copy__207: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg355_1, getitem_501);  arg355_1 = getitem_501 = copy__207 = None
+	        copy__208: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg356_1, getitem_502);  arg356_1 = getitem_502 = copy__208 = None
+	        copy__209: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg357_1, getitem_503);  arg357_1 = getitem_503 = copy__209 = None
+	        copy__210: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg358_1, getitem_504);  arg358_1 = getitem_504 = copy__210 = None
+	        copy__211: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg359_1, getitem_505);  arg359_1 = getitem_505 = copy__211 = None
+	        copy__212: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg360_1, getitem_506);  arg360_1 = getitem_506 = copy__212 = None
+	        copy__213: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg361_1, getitem_507);  arg361_1 = getitem_507 = copy__213 = None
+	        copy__214: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg362_1, getitem_508);  arg362_1 = getitem_508 = copy__214 = None
+	        copy__215: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg363_1, getitem_509);  arg363_1 = getitem_509 = copy__215 = None
+	        copy__216: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg364_1, getitem_510);  arg364_1 = getitem_510 = copy__216 = None
+	        copy__217: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg365_1, getitem_511);  arg365_1 = getitem_511 = copy__217 = None
+	        copy__218: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg366_1, getitem_512);  arg366_1 = getitem_512 = copy__218 = None
+	        copy__219: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg367_1, getitem_513);  arg367_1 = getitem_513 = copy__219 = None
+	        copy__220: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg368_1, getitem_514);  arg368_1 = getitem_514 = copy__220 = None
+	        copy__221: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg369_1, getitem_515);  arg369_1 = getitem_515 = copy__221 = None
+	        copy__222: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg370_1, getitem_516);  arg370_1 = getitem_516 = copy__222 = None
+	        copy__223: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg371_1, getitem_517);  arg371_1 = getitem_517 = copy__223 = None
+	        copy__224: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg372_1, getitem_518);  arg372_1 = getitem_518 = copy__224 = None
+	        copy__225: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg373_1, getitem_519);  arg373_1 = getitem_519 = copy__225 = None
+	        copy__226: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg374_1, getitem_520);  arg374_1 = getitem_520 = copy__226 = None
+	        copy__227: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg375_1, getitem_521);  arg375_1 = getitem_521 = copy__227 = None
+	        copy__228: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg376_1, getitem_522);  arg376_1 = getitem_522 = copy__228 = None
+	        copy__229: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg377_1, getitem_523);  arg377_1 = getitem_523 = copy__229 = None
+	        copy__230: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg378_1, getitem_524);  arg378_1 = getitem_524 = copy__230 = None
+	        copy__231: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg379_1, getitem_525);  arg379_1 = getitem_525 = copy__231 = None
+	        copy__232: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg380_1, getitem_526);  arg380_1 = getitem_526 = copy__232 = None
+	        copy__233: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg381_1, getitem_527);  arg381_1 = getitem_527 = copy__233 = None
+	        copy__234: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg382_1, getitem_528);  arg382_1 = getitem_528 = copy__234 = None
+	        copy__235: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg383_1, getitem_529);  arg383_1 = getitem_529 = copy__235 = None
+	        copy__236: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg384_1, getitem_530);  arg384_1 = getitem_530 = copy__236 = None
+	        copy__237: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg385_1, getitem_531);  arg385_1 = getitem_531 = copy__237 = None
+	        copy__238: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg386_1, getitem_532);  arg386_1 = getitem_532 = copy__238 = None
+	        copy__239: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg387_1, getitem_533);  arg387_1 = getitem_533 = copy__239 = None
+	        copy__240: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg388_1, getitem_534);  arg388_1 = getitem_534 = copy__240 = None
+	        copy__241: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg389_1, getitem_535);  arg389_1 = getitem_535 = copy__241 = None
+	        copy__242: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg390_1, getitem_536);  arg390_1 = getitem_536 = copy__242 = None
+	        copy__243: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg391_1, getitem_537);  arg391_1 = getitem_537 = copy__243 = None
+	        copy__244: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg392_1, getitem_538);  arg392_1 = getitem_538 = copy__244 = None
+	        copy__245: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg393_1, getitem_539);  arg393_1 = getitem_539 = copy__245 = None
+	        copy__246: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg394_1, getitem_540);  arg394_1 = getitem_540 = copy__246 = None
+	        copy__247: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg395_1, getitem_541);  arg395_1 = getitem_541 = copy__247 = None
+	        copy__248: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg396_1, getitem_542);  arg396_1 = getitem_542 = copy__248 = None
+	        copy__249: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg397_1, getitem_543);  arg397_1 = getitem_543 = copy__249 = None
+	        copy__250: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg398_1, getitem_544);  arg398_1 = getitem_544 = copy__250 = None
+	        copy__251: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg399_1, getitem_545);  arg399_1 = getitem_545 = copy__251 = None
+	        copy__252: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg400_1, getitem_546);  arg400_1 = getitem_546 = copy__252 = None
+	        copy__253: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg401_1, getitem_547);  arg401_1 = getitem_547 = copy__253 = None
+	        copy__254: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg402_1, getitem_548);  arg402_1 = getitem_548 = copy__254 = None
+	        copy__255: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg403_1, getitem_549);  arg403_1 = getitem_549 = copy__255 = None
+	        copy__256: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg404_1, getitem_550);  arg404_1 = getitem_550 = copy__256 = None
+	        copy__257: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg405_1, getitem_551);  arg405_1 = getitem_551 = copy__257 = None
+	        copy__258: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg406_1, getitem_552);  arg406_1 = getitem_552 = copy__258 = None
+	        copy__259: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg407_1, getitem_553);  arg407_1 = getitem_553 = copy__259 = None
+	        copy__260: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg408_1, getitem_554);  arg408_1 = getitem_554 = copy__260 = None
+	        copy__261: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg409_1, getitem_555);  arg409_1 = getitem_555 = copy__261 = None
+	        copy__262: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg410_1, getitem_556);  arg410_1 = getitem_556 = copy__262 = None
+	        copy__263: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg411_1, getitem_557);  arg411_1 = getitem_557 = copy__263 = None
+	        copy__264: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg412_1, getitem_558);  arg412_1 = getitem_558 = copy__264 = None
+	        copy__265: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg413_1, getitem_559);  arg413_1 = getitem_559 = copy__265 = None
+	        copy__266: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg414_1, getitem_560);  arg414_1 = getitem_560 = copy__266 = None
+	        copy__267: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg415_1, getitem_561);  arg415_1 = getitem_561 = copy__267 = None
+	        copy__268: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg416_1, getitem_562);  arg416_1 = getitem_562 = copy__268 = None
+	        copy__269: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg417_1, getitem_563);  arg417_1 = getitem_563 = copy__269 = None
+	        copy__270: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg418_1, getitem_564);  arg418_1 = getitem_564 = copy__270 = None
+	        copy__271: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg419_1, getitem_565);  arg419_1 = getitem_565 = copy__271 = None
+	        copy__272: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg420_1, getitem_566);  arg420_1 = getitem_566 = copy__272 = None
+	        copy__273: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg421_1, getitem_567);  arg421_1 = getitem_567 = copy__273 = None
+	        copy__274: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg422_1, getitem_568);  arg422_1 = getitem_568 = copy__274 = None
+	        copy__275: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg423_1, getitem_569);  arg423_1 = getitem_569 = copy__275 = None
+	        copy__276: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg424_1, getitem_570);  arg424_1 = getitem_570 = copy__276 = None
+	        copy__277: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg425_1, getitem_571);  arg425_1 = getitem_571 = copy__277 = None
+	        copy__278: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg426_1, getitem_572);  arg426_1 = getitem_572 = copy__278 = None
+	        copy__279: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg427_1, getitem_573);  arg427_1 = getitem_573 = copy__279 = None
+	        copy__280: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg428_1, getitem_574);  arg428_1 = getitem_574 = copy__280 = None
+	        copy__281: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg429_1, getitem_575);  arg429_1 = getitem_575 = copy__281 = None
+	        copy__282: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg430_1, getitem_576);  arg430_1 = getitem_576 = copy__282 = None
+	        copy__283: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg431_1, getitem_577);  arg431_1 = getitem_577 = copy__283 = None
+	        copy__284: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg432_1, getitem_578);  arg432_1 = getitem_578 = copy__284 = None
+	        copy__285: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg433_1, getitem_579);  arg433_1 = getitem_579 = copy__285 = None
+	        copy__286: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg434_1, getitem_580);  arg434_1 = getitem_580 = copy__286 = None
+	        copy__287: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg435_1, getitem_581);  arg435_1 = getitem_581 = copy__287 = None
+	        copy__288: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg436_1, getitem_582);  arg436_1 = getitem_582 = copy__288 = None
+	        copy__289: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg437_1, getitem_583);  arg437_1 = getitem_583 = copy__289 = None
+	        copy__290: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg438_1, getitem_584);  arg438_1 = getitem_584 = copy__290 = None
+	        copy__291: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg439_1, getitem_585);  arg439_1 = getitem_585 = copy__291 = None
+	        copy__292: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg440_1, getitem_586);  arg440_1 = getitem_586 = copy__292 = None
+	        copy__293: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg441_1, getitem_587);  arg441_1 = getitem_587 = copy__293 = None
+	        copy__294: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg442_1, getitem_588);  arg442_1 = getitem_588 = copy__294 = None
+	        copy__295: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg443_1, getitem_589);  arg443_1 = getitem_589 = copy__295 = None
+	        copy__296: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg444_1, getitem_590);  arg444_1 = getitem_590 = copy__296 = None
+	        copy__297: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg445_1, getitem_591);  arg445_1 = getitem_591 = copy__297 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        copy__298: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg446_1, getitem_888);  arg446_1 = getitem_888 = copy__298 = None
+	        copy__299: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg447_1, getitem_890);  arg447_1 = getitem_890 = copy__299 = None
+	        copy__300: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg448_1, getitem_891);  arg448_1 = getitem_891 = copy__300 = None
+	        copy__301: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg449_1, getitem_892);  arg449_1 = getitem_892 = copy__301 = None
+	        copy__302: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg450_1, getitem_893);  arg450_1 = getitem_893 = copy__302 = None
+	        copy__303: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg451_1, getitem_894);  arg451_1 = getitem_894 = copy__303 = None
+	        copy__304: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg452_1, getitem_895);  arg452_1 = getitem_895 = copy__304 = None
+	        copy__305: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg453_1, getitem_896);  arg453_1 = getitem_896 = copy__305 = None
+	        copy__306: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg454_1, getitem_897);  arg454_1 = getitem_897 = copy__306 = None
+	        copy__307: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg455_1, getitem_898);  arg455_1 = getitem_898 = copy__307 = None
+	        copy__308: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg456_1, getitem_899);  arg456_1 = getitem_899 = copy__308 = None
+	        copy__309: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg457_1, getitem_900);  arg457_1 = getitem_900 = copy__309 = None
+	        copy__310: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg458_1, getitem_901);  arg458_1 = getitem_901 = copy__310 = None
+	        copy__311: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg459_1, getitem_902);  arg459_1 = getitem_902 = copy__311 = None
+	        copy__312: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg460_1, getitem_903);  arg460_1 = getitem_903 = copy__312 = None
+	        copy__313: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg461_1, getitem_904);  arg461_1 = getitem_904 = copy__313 = None
+	        copy__314: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg462_1, getitem_905);  arg462_1 = getitem_905 = copy__314 = None
+	        copy__315: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg463_1, getitem_906);  arg463_1 = getitem_906 = copy__315 = None
+	        copy__316: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg464_1, getitem_907);  arg464_1 = getitem_907 = copy__316 = None
+	        copy__317: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg465_1, getitem_908);  arg465_1 = getitem_908 = copy__317 = None
+	        copy__318: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg466_1, getitem_909);  arg466_1 = getitem_909 = copy__318 = None
+	        copy__319: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg467_1, getitem_910);  arg467_1 = getitem_910 = copy__319 = None
+	        copy__320: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg468_1, getitem_911);  arg468_1 = getitem_911 = copy__320 = None
+	        copy__321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg469_1, getitem_912);  arg469_1 = getitem_912 = copy__321 = None
+	        copy__322: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg470_1, getitem_913);  arg470_1 = getitem_913 = copy__322 = None
+	        copy__323: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg471_1, getitem_914);  arg471_1 = getitem_914 = copy__323 = None
+	        copy__324: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg472_1, getitem_915);  arg472_1 = getitem_915 = copy__324 = None
+	        copy__325: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg473_1, getitem_916);  arg473_1 = getitem_916 = copy__325 = None
+	        copy__326: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg474_1, getitem_917);  arg474_1 = getitem_917 = copy__326 = None
+	        copy__327: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg475_1, getitem_918);  arg475_1 = getitem_918 = copy__327 = None
+	        copy__328: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg476_1, getitem_919);  arg476_1 = getitem_919 = copy__328 = None
+	        copy__329: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg477_1, getitem_920);  arg477_1 = getitem_920 = copy__329 = None
+	        copy__330: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg478_1, getitem_921);  arg478_1 = getitem_921 = copy__330 = None
+	        copy__331: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg479_1, getitem_922);  arg479_1 = getitem_922 = copy__331 = None
+	        copy__332: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg480_1, getitem_923);  arg480_1 = getitem_923 = copy__332 = None
+	        copy__333: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg481_1, getitem_924);  arg481_1 = getitem_924 = copy__333 = None
+	        copy__334: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg482_1, getitem_925);  arg482_1 = getitem_925 = copy__334 = None
+	        copy__335: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg483_1, getitem_926);  arg483_1 = getitem_926 = copy__335 = None
+	        copy__336: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg484_1, getitem_927);  arg484_1 = getitem_927 = copy__336 = None
+	        copy__337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg485_1, getitem_928);  arg485_1 = getitem_928 = copy__337 = None
+	        copy__338: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg486_1, getitem_929);  arg486_1 = getitem_929 = copy__338 = None
+	        copy__339: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg487_1, getitem_930);  arg487_1 = getitem_930 = copy__339 = None
+	        copy__340: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg488_1, getitem_931);  arg488_1 = getitem_931 = copy__340 = None
+	        copy__341: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg489_1, getitem_932);  arg489_1 = getitem_932 = copy__341 = None
+	        copy__342: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg490_1, getitem_933);  arg490_1 = getitem_933 = copy__342 = None
+	        copy__343: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg491_1, getitem_934);  arg491_1 = getitem_934 = copy__343 = None
+	        copy__344: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg492_1, getitem_935);  arg492_1 = getitem_935 = copy__344 = None
+	        copy__345: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg493_1, getitem_936);  arg493_1 = getitem_936 = copy__345 = None
+	        copy__346: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg494_1, getitem_937);  arg494_1 = getitem_937 = copy__346 = None
+	        copy__347: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg495_1, getitem_938);  arg495_1 = getitem_938 = copy__347 = None
+	        copy__348: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg496_1, getitem_939);  arg496_1 = getitem_939 = copy__348 = None
+	        copy__349: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg497_1, getitem_940);  arg497_1 = getitem_940 = copy__349 = None
+	        copy__350: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg498_1, getitem_941);  arg498_1 = getitem_941 = copy__350 = None
+	        copy__351: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg499_1, getitem_942);  arg499_1 = getitem_942 = copy__351 = None
+	        copy__352: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg500_1, getitem_943);  arg500_1 = getitem_943 = copy__352 = None
+	        copy__353: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg501_1, getitem_944);  arg501_1 = getitem_944 = copy__353 = None
+	        copy__354: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg502_1, getitem_945);  arg502_1 = getitem_945 = copy__354 = None
+	        copy__355: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg503_1, getitem_946);  arg503_1 = getitem_946 = copy__355 = None
+	        copy__356: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg504_1, getitem_947);  arg504_1 = getitem_947 = copy__356 = None
+	        copy__357: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg505_1, getitem_948);  arg505_1 = getitem_948 = copy__357 = None
+	        copy__358: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg506_1, getitem_949);  arg506_1 = getitem_949 = copy__358 = None
+	        copy__359: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg507_1, getitem_950);  arg507_1 = getitem_950 = copy__359 = None
+	        copy__360: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg508_1, getitem_951);  arg508_1 = getitem_951 = copy__360 = None
+	        copy__361: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg509_1, getitem_952);  arg509_1 = getitem_952 = copy__361 = None
+	        copy__362: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg510_1, getitem_953);  arg510_1 = getitem_953 = copy__362 = None
+	        copy__363: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg511_1, getitem_954);  arg511_1 = getitem_954 = copy__363 = None
+	        copy__364: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg512_1, getitem_955);  arg512_1 = getitem_955 = copy__364 = None
+	        copy__365: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg513_1, getitem_956);  arg513_1 = getitem_956 = copy__365 = None
+	        copy__366: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg514_1, getitem_957);  arg514_1 = getitem_957 = copy__366 = None
+	        copy__367: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg515_1, getitem_958);  arg515_1 = getitem_958 = copy__367 = None
+	        copy__368: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg516_1, getitem_959);  arg516_1 = getitem_959 = copy__368 = None
+	        copy__369: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg517_1, getitem_960);  arg517_1 = getitem_960 = copy__369 = None
+	        copy__370: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg518_1, getitem_961);  arg518_1 = getitem_961 = copy__370 = None
+	        copy__371: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg519_1, getitem_962);  arg519_1 = getitem_962 = copy__371 = None
+	        copy__372: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg520_1, getitem_963);  arg520_1 = getitem_963 = copy__372 = None
+	        copy__373: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg521_1, getitem_964);  arg521_1 = getitem_964 = copy__373 = None
+	        copy__374: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg522_1, getitem_965);  arg522_1 = getitem_965 = copy__374 = None
+	        copy__375: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg523_1, getitem_966);  arg523_1 = getitem_966 = copy__375 = None
+	        copy__376: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg524_1, getitem_967);  arg524_1 = getitem_967 = copy__376 = None
+	        copy__377: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg525_1, getitem_968);  arg525_1 = getitem_968 = copy__377 = None
+	        copy__378: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg526_1, getitem_969);  arg526_1 = getitem_969 = copy__378 = None
+	        copy__379: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg527_1, getitem_970);  arg527_1 = getitem_970 = copy__379 = None
+	        copy__380: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg528_1, getitem_971);  arg528_1 = getitem_971 = copy__380 = None
+	        copy__381: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg529_1, getitem_972);  arg529_1 = getitem_972 = copy__381 = None
+	        copy__382: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg530_1, getitem_973);  arg530_1 = getitem_973 = copy__382 = None
+	        copy__383: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg531_1, getitem_974);  arg531_1 = getitem_974 = copy__383 = None
+	        copy__384: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg532_1, getitem_975);  arg532_1 = getitem_975 = copy__384 = None
+	        copy__385: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg533_1, getitem_976);  arg533_1 = getitem_976 = copy__385 = None
+	        copy__386: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg534_1, getitem_977);  arg534_1 = getitem_977 = copy__386 = None
+	        copy__387: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg535_1, getitem_978);  arg535_1 = getitem_978 = copy__387 = None
+	        copy__388: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg536_1, getitem_979);  arg536_1 = getitem_979 = copy__388 = None
+	        copy__389: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg537_1, getitem_980);  arg537_1 = getitem_980 = copy__389 = None
+	        copy__390: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg538_1, getitem_981);  arg538_1 = getitem_981 = copy__390 = None
+	        copy__391: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg539_1, getitem_982);  arg539_1 = getitem_982 = copy__391 = None
+	        copy__392: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg540_1, getitem_983);  arg540_1 = getitem_983 = copy__392 = None
+	        copy__393: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg541_1, getitem_984);  arg541_1 = getitem_984 = copy__393 = None
+	        copy__394: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg542_1, getitem_985);  arg542_1 = getitem_985 = copy__394 = None
+	        copy__395: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg543_1, getitem_986);  arg543_1 = getitem_986 = copy__395 = None
+	        copy__396: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg544_1, getitem_987);  arg544_1 = getitem_987 = copy__396 = None
+	        copy__397: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg545_1, getitem_988);  arg545_1 = getitem_988 = copy__397 = None
+	        copy__398: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg546_1, getitem_989);  arg546_1 = getitem_989 = copy__398 = None
+	        copy__399: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg547_1, getitem_990);  arg547_1 = getitem_990 = copy__399 = None
+	        copy__400: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg548_1, getitem_991);  arg548_1 = getitem_991 = copy__400 = None
+	        copy__401: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg549_1, getitem_992);  arg549_1 = getitem_992 = copy__401 = None
+	        copy__402: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg550_1, getitem_993);  arg550_1 = getitem_993 = copy__402 = None
+	        copy__403: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg551_1, getitem_994);  arg551_1 = getitem_994 = copy__403 = None
+	        copy__404: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg552_1, getitem_995);  arg552_1 = getitem_995 = copy__404 = None
+	        copy__405: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg553_1, getitem_996);  arg553_1 = getitem_996 = copy__405 = None
+	        copy__406: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg554_1, getitem_997);  arg554_1 = getitem_997 = copy__406 = None
+	        copy__407: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg555_1, getitem_998);  arg555_1 = getitem_998 = copy__407 = None
+	        copy__408: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg556_1, getitem_999);  arg556_1 = getitem_999 = copy__408 = None
+	        copy__409: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg557_1, getitem_1000);  arg557_1 = getitem_1000 = copy__409 = None
+	        copy__410: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg558_1, getitem_1001);  arg558_1 = getitem_1001 = copy__410 = None
+	        copy__411: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg559_1, getitem_1002);  arg559_1 = getitem_1002 = copy__411 = None
+	        copy__412: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg560_1, getitem_1003);  arg560_1 = getitem_1003 = copy__412 = None
+	        copy__413: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg561_1, getitem_1004);  arg561_1 = getitem_1004 = copy__413 = None
+	        copy__414: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg562_1, getitem_1005);  arg562_1 = getitem_1005 = copy__414 = None
+	        copy__415: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg563_1, getitem_1006);  arg563_1 = getitem_1006 = copy__415 = None
+	        copy__416: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg564_1, getitem_1007);  arg564_1 = getitem_1007 = copy__416 = None
+	        copy__417: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg565_1, getitem_1008);  arg565_1 = getitem_1008 = copy__417 = None
+	        copy__418: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg566_1, getitem_1009);  arg566_1 = getitem_1009 = copy__418 = None
+	        copy__419: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg567_1, getitem_1010);  arg567_1 = getitem_1010 = copy__419 = None
+	        copy__420: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg568_1, getitem_1011);  arg568_1 = getitem_1011 = copy__420 = None
+	        copy__421: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg569_1, getitem_1012);  arg569_1 = getitem_1012 = copy__421 = None
+	        copy__422: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg570_1, getitem_1013);  arg570_1 = getitem_1013 = copy__422 = None
+	        copy__423: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg571_1, getitem_1014);  arg571_1 = getitem_1014 = copy__423 = None
+	        copy__424: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg572_1, getitem_1015);  arg572_1 = getitem_1015 = copy__424 = None
+	        copy__425: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg573_1, getitem_1016);  arg573_1 = getitem_1016 = copy__425 = None
+	        copy__426: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg574_1, getitem_1017);  arg574_1 = getitem_1017 = copy__426 = None
+	        copy__427: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg575_1, getitem_1018);  arg575_1 = getitem_1018 = copy__427 = None
+	        copy__428: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg576_1, getitem_1019);  arg576_1 = getitem_1019 = copy__428 = None
+	        copy__429: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg577_1, getitem_1020);  arg577_1 = getitem_1020 = copy__429 = None
+	        copy__430: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg578_1, getitem_1021);  arg578_1 = getitem_1021 = copy__430 = None
+	        copy__431: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg579_1, getitem_1022);  arg579_1 = getitem_1022 = copy__431 = None
+	        copy__432: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg580_1, getitem_1023);  arg580_1 = getitem_1023 = copy__432 = None
+	        copy__433: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg581_1, getitem_1024);  arg581_1 = getitem_1024 = copy__433 = None
+	        copy__434: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg582_1, getitem_1025);  arg582_1 = getitem_1025 = copy__434 = None
+	        copy__435: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg583_1, getitem_1026);  arg583_1 = getitem_1026 = copy__435 = None
+	        copy__436: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg584_1, getitem_1027);  arg584_1 = getitem_1027 = copy__436 = None
+	        copy__437: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg585_1, getitem_1028);  arg585_1 = getitem_1028 = copy__437 = None
+	        copy__438: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg586_1, getitem_1029);  arg586_1 = getitem_1029 = copy__438 = None
+	        copy__439: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg587_1, getitem_1030);  arg587_1 = getitem_1030 = copy__439 = None
+	        copy__440: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg588_1, getitem_1031);  arg588_1 = getitem_1031 = copy__440 = None
+	        copy__441: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg589_1, getitem_1032);  arg589_1 = getitem_1032 = copy__441 = None
+	        copy__442: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg590_1, getitem_1033);  arg590_1 = getitem_1033 = copy__442 = None
+	        copy__443: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg591_1, getitem_1034);  arg591_1 = getitem_1034 = copy__443 = None
+	        copy__444: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg592_1, getitem_1035);  arg592_1 = getitem_1035 = copy__444 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1)
+	        copy__445: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg593_1, getitem);  arg593_1 = getitem = copy__445 = None
+	        copy__446: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg594_1, getitem_2);  arg594_1 = getitem_2 = copy__446 = None
+	        copy__447: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg595_1, getitem_3);  arg595_1 = getitem_3 = copy__447 = None
+	        copy__448: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg596_1, getitem_4);  arg596_1 = getitem_4 = copy__448 = None
+	        copy__449: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg597_1, getitem_5);  arg597_1 = getitem_5 = copy__449 = None
+	        copy__450: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg598_1, getitem_6);  arg598_1 = getitem_6 = copy__450 = None
+	        copy__451: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg599_1, getitem_7);  arg599_1 = getitem_7 = copy__451 = None
+	        copy__452: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg600_1, getitem_8);  arg600_1 = getitem_8 = copy__452 = None
+	        copy__453: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg601_1, getitem_9);  arg601_1 = getitem_9 = copy__453 = None
+	        copy__454: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg602_1, getitem_10);  arg602_1 = getitem_10 = copy__454 = None
+	        copy__455: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg603_1, getitem_11);  arg603_1 = getitem_11 = copy__455 = None
+	        copy__456: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg604_1, getitem_12);  arg604_1 = getitem_12 = copy__456 = None
+	        copy__457: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg605_1, getitem_13);  arg605_1 = getitem_13 = copy__457 = None
+	        copy__458: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg606_1, getitem_14);  arg606_1 = getitem_14 = copy__458 = None
+	        copy__459: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg607_1, getitem_15);  arg607_1 = getitem_15 = copy__459 = None
+	        copy__460: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg608_1, getitem_16);  arg608_1 = getitem_16 = copy__460 = None
+	        copy__461: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg609_1, getitem_17);  arg609_1 = getitem_17 = copy__461 = None
+	        copy__462: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg610_1, getitem_18);  arg610_1 = getitem_18 = copy__462 = None
+	        copy__463: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg611_1, getitem_19);  arg611_1 = getitem_19 = copy__463 = None
+	        copy__464: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg612_1, getitem_20);  arg612_1 = getitem_20 = copy__464 = None
+	        copy__465: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg613_1, getitem_21);  arg613_1 = getitem_21 = copy__465 = None
+	        copy__466: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg614_1, getitem_22);  arg614_1 = getitem_22 = copy__466 = None
+	        copy__467: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg615_1, getitem_23);  arg615_1 = getitem_23 = copy__467 = None
+	        copy__468: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg616_1, getitem_24);  arg616_1 = getitem_24 = copy__468 = None
+	        copy__469: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg617_1, getitem_25);  arg617_1 = getitem_25 = copy__469 = None
+	        copy__470: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg618_1, getitem_26);  arg618_1 = getitem_26 = copy__470 = None
+	        copy__471: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg619_1, getitem_27);  arg619_1 = getitem_27 = copy__471 = None
+	        copy__472: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg620_1, getitem_28);  arg620_1 = getitem_28 = copy__472 = None
+	        copy__473: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg621_1, getitem_29);  arg621_1 = getitem_29 = copy__473 = None
+	        copy__474: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg622_1, getitem_30);  arg622_1 = getitem_30 = copy__474 = None
+	        copy__475: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg623_1, getitem_31);  arg623_1 = getitem_31 = copy__475 = None
+	        copy__476: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg624_1, getitem_32);  arg624_1 = getitem_32 = copy__476 = None
+	        copy__477: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg625_1, getitem_33);  arg625_1 = getitem_33 = copy__477 = None
+	        copy__478: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg626_1, getitem_34);  arg626_1 = getitem_34 = copy__478 = None
+	        copy__479: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg627_1, getitem_35);  arg627_1 = getitem_35 = copy__479 = None
+	        copy__480: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg628_1, getitem_36);  arg628_1 = getitem_36 = copy__480 = None
+	        copy__481: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg629_1, getitem_37);  arg629_1 = getitem_37 = copy__481 = None
+	        copy__482: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg630_1, getitem_38);  arg630_1 = getitem_38 = copy__482 = None
+	        copy__483: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg631_1, getitem_39);  arg631_1 = getitem_39 = copy__483 = None
+	        copy__484: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg632_1, getitem_40);  arg632_1 = getitem_40 = copy__484 = None
+	        copy__485: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg633_1, getitem_41);  arg633_1 = getitem_41 = copy__485 = None
+	        copy__486: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg634_1, getitem_42);  arg634_1 = getitem_42 = copy__486 = None
+	        copy__487: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg635_1, getitem_43);  arg635_1 = getitem_43 = copy__487 = None
+	        copy__488: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg636_1, getitem_44);  arg636_1 = getitem_44 = copy__488 = None
+	        copy__489: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg637_1, getitem_45);  arg637_1 = getitem_45 = copy__489 = None
+	        copy__490: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg638_1, getitem_46);  arg638_1 = getitem_46 = copy__490 = None
+	        copy__491: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg639_1, getitem_47);  arg639_1 = getitem_47 = copy__491 = None
+	        copy__492: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg640_1, getitem_48);  arg640_1 = getitem_48 = copy__492 = None
+	        copy__493: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg641_1, getitem_49);  arg641_1 = getitem_49 = copy__493 = None
+	        copy__494: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg642_1, getitem_50);  arg642_1 = getitem_50 = copy__494 = None
+	        copy__495: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg643_1, getitem_51);  arg643_1 = getitem_51 = copy__495 = None
+	        copy__496: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg644_1, getitem_52);  arg644_1 = getitem_52 = copy__496 = None
+	        copy__497: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg645_1, getitem_53);  arg645_1 = getitem_53 = copy__497 = None
+	        copy__498: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg646_1, getitem_54);  arg646_1 = getitem_54 = copy__498 = None
+	        copy__499: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg647_1, getitem_55);  arg647_1 = getitem_55 = copy__499 = None
+	        copy__500: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg648_1, getitem_56);  arg648_1 = getitem_56 = copy__500 = None
+	        copy__501: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg649_1, getitem_57);  arg649_1 = getitem_57 = copy__501 = None
+	        copy__502: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg650_1, getitem_58);  arg650_1 = getitem_58 = copy__502 = None
+	        copy__503: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg651_1, getitem_59);  arg651_1 = getitem_59 = copy__503 = None
+	        copy__504: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg652_1, getitem_60);  arg652_1 = getitem_60 = copy__504 = None
+	        copy__505: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg653_1, getitem_61);  arg653_1 = getitem_61 = copy__505 = None
+	        copy__506: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg654_1, getitem_62);  arg654_1 = getitem_62 = copy__506 = None
+	        copy__507: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg655_1, getitem_63);  arg655_1 = getitem_63 = copy__507 = None
+	        copy__508: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg656_1, getitem_64);  arg656_1 = getitem_64 = copy__508 = None
+	        copy__509: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg657_1, getitem_65);  arg657_1 = getitem_65 = copy__509 = None
+	        copy__510: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg658_1, getitem_66);  arg658_1 = getitem_66 = copy__510 = None
+	        copy__511: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg659_1, getitem_67);  arg659_1 = getitem_67 = copy__511 = None
+	        copy__512: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg660_1, getitem_68);  arg660_1 = getitem_68 = copy__512 = None
+	        copy__513: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg661_1, getitem_69);  arg661_1 = getitem_69 = copy__513 = None
+	        copy__514: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg662_1, getitem_70);  arg662_1 = getitem_70 = copy__514 = None
+	        copy__515: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg663_1, getitem_71);  arg663_1 = getitem_71 = copy__515 = None
+	        copy__516: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg664_1, getitem_72);  arg664_1 = getitem_72 = copy__516 = None
+	        copy__517: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg665_1, getitem_73);  arg665_1 = getitem_73 = copy__517 = None
+	        copy__518: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg666_1, getitem_74);  arg666_1 = getitem_74 = copy__518 = None
+	        copy__519: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg667_1, getitem_75);  arg667_1 = getitem_75 = copy__519 = None
+	        copy__520: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg668_1, getitem_76);  arg668_1 = getitem_76 = copy__520 = None
+	        copy__521: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg669_1, getitem_77);  arg669_1 = getitem_77 = copy__521 = None
+	        copy__522: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg670_1, getitem_78);  arg670_1 = getitem_78 = copy__522 = None
+	        copy__523: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg671_1, getitem_79);  arg671_1 = getitem_79 = copy__523 = None
+	        copy__524: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg672_1, getitem_80);  arg672_1 = getitem_80 = copy__524 = None
+	        copy__525: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg673_1, getitem_81);  arg673_1 = getitem_81 = copy__525 = None
+	        copy__526: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg674_1, getitem_82);  arg674_1 = getitem_82 = copy__526 = None
+	        copy__527: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg675_1, getitem_83);  arg675_1 = getitem_83 = copy__527 = None
+	        copy__528: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg676_1, getitem_84);  arg676_1 = getitem_84 = copy__528 = None
+	        copy__529: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg677_1, getitem_85);  arg677_1 = getitem_85 = copy__529 = None
+	        copy__530: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg678_1, getitem_86);  arg678_1 = getitem_86 = copy__530 = None
+	        copy__531: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg679_1, getitem_87);  arg679_1 = getitem_87 = copy__531 = None
+	        copy__532: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg680_1, getitem_88);  arg680_1 = getitem_88 = copy__532 = None
+	        copy__533: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg681_1, getitem_89);  arg681_1 = getitem_89 = copy__533 = None
+	        copy__534: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg682_1, getitem_90);  arg682_1 = getitem_90 = copy__534 = None
+	        copy__535: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg683_1, getitem_91);  arg683_1 = getitem_91 = copy__535 = None
+	        copy__536: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg684_1, getitem_92);  arg684_1 = getitem_92 = copy__536 = None
+	        copy__537: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg685_1, getitem_93);  arg685_1 = getitem_93 = copy__537 = None
+	        copy__538: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg686_1, getitem_94);  arg686_1 = getitem_94 = copy__538 = None
+	        copy__539: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg687_1, getitem_95);  arg687_1 = getitem_95 = copy__539 = None
+	        copy__540: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg688_1, getitem_96);  arg688_1 = getitem_96 = copy__540 = None
+	        copy__541: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg689_1, getitem_97);  arg689_1 = getitem_97 = copy__541 = None
+	        copy__542: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg690_1, getitem_98);  arg690_1 = getitem_98 = copy__542 = None
+	        copy__543: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg691_1, getitem_99);  arg691_1 = getitem_99 = copy__543 = None
+	        copy__544: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg692_1, getitem_100);  arg692_1 = getitem_100 = copy__544 = None
+	        copy__545: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg693_1, getitem_101);  arg693_1 = getitem_101 = copy__545 = None
+	        copy__546: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg694_1, getitem_102);  arg694_1 = getitem_102 = copy__546 = None
+	        copy__547: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg695_1, getitem_103);  arg695_1 = getitem_103 = copy__547 = None
+	        copy__548: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg696_1, getitem_104);  arg696_1 = getitem_104 = copy__548 = None
+	        copy__549: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg697_1, getitem_105);  arg697_1 = getitem_105 = copy__549 = None
+	        copy__550: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg698_1, getitem_106);  arg698_1 = getitem_106 = copy__550 = None
+	        copy__551: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg699_1, getitem_107);  arg699_1 = getitem_107 = copy__551 = None
+	        copy__552: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg700_1, getitem_108);  arg700_1 = getitem_108 = copy__552 = None
+	        copy__553: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg701_1, getitem_109);  arg701_1 = getitem_109 = copy__553 = None
+	        copy__554: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg702_1, getitem_110);  arg702_1 = getitem_110 = copy__554 = None
+	        copy__555: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg703_1, getitem_111);  arg703_1 = getitem_111 = copy__555 = None
+	        copy__556: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg704_1, getitem_112);  arg704_1 = getitem_112 = copy__556 = None
+	        copy__557: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg705_1, getitem_113);  arg705_1 = getitem_113 = copy__557 = None
+	        copy__558: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg706_1, getitem_114);  arg706_1 = getitem_114 = copy__558 = None
+	        copy__559: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg707_1, getitem_115);  arg707_1 = getitem_115 = copy__559 = None
+	        copy__560: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg708_1, getitem_116);  arg708_1 = getitem_116 = copy__560 = None
+	        copy__561: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg709_1, getitem_117);  arg709_1 = getitem_117 = copy__561 = None
+	        copy__562: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg710_1, getitem_118);  arg710_1 = getitem_118 = copy__562 = None
+	        copy__563: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg711_1, getitem_119);  arg711_1 = getitem_119 = copy__563 = None
+	        copy__564: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg712_1, getitem_120);  arg712_1 = getitem_120 = copy__564 = None
+	        copy__565: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg713_1, getitem_121);  arg713_1 = getitem_121 = copy__565 = None
+	        copy__566: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg714_1, getitem_122);  arg714_1 = getitem_122 = copy__566 = None
+	        copy__567: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg715_1, getitem_123);  arg715_1 = getitem_123 = copy__567 = None
+	        copy__568: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg716_1, getitem_124);  arg716_1 = getitem_124 = copy__568 = None
+	        copy__569: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg717_1, getitem_125);  arg717_1 = getitem_125 = copy__569 = None
+	        copy__570: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg718_1, getitem_126);  arg718_1 = getitem_126 = copy__570 = None
+	        copy__571: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg719_1, getitem_127);  arg719_1 = getitem_127 = copy__571 = None
+	        copy__572: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg720_1, getitem_128);  arg720_1 = getitem_128 = copy__572 = None
+	        copy__573: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg721_1, getitem_129);  arg721_1 = getitem_129 = copy__573 = None
+	        copy__574: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg722_1, getitem_130);  arg722_1 = getitem_130 = copy__574 = None
+	        copy__575: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg723_1, getitem_131);  arg723_1 = getitem_131 = copy__575 = None
+	        copy__576: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg724_1, getitem_132);  arg724_1 = getitem_132 = copy__576 = None
+	        copy__577: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg725_1, getitem_133);  arg725_1 = getitem_133 = copy__577 = None
+	        copy__578: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg726_1, getitem_134);  arg726_1 = getitem_134 = copy__578 = None
+	        copy__579: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg727_1, getitem_135);  arg727_1 = getitem_135 = copy__579 = None
+	        copy__580: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg728_1, getitem_136);  arg728_1 = getitem_136 = copy__580 = None
+	        copy__581: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg729_1, getitem_137);  arg729_1 = getitem_137 = copy__581 = None
+	        copy__582: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg730_1, getitem_138);  arg730_1 = getitem_138 = copy__582 = None
+	        copy__583: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg731_1, getitem_139);  arg731_1 = getitem_139 = copy__583 = None
+	        copy__584: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg732_1, getitem_140);  arg732_1 = getitem_140 = copy__584 = None
+	        copy__585: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg733_1, getitem_141);  arg733_1 = getitem_141 = copy__585 = None
+	        copy__586: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg734_1, getitem_142);  arg734_1 = getitem_142 = copy__586 = None
+	        copy__587: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg735_1, getitem_143);  arg735_1 = getitem_143 = copy__587 = None
+	        copy__588: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg736_1, getitem_144);  arg736_1 = getitem_144 = copy__588 = None
+	        copy__589: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg737_1, getitem_145);  arg737_1 = getitem_145 = copy__589 = None
+	        copy__590: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg738_1, getitem_146);  arg738_1 = getitem_146 = copy__590 = None
+	        copy__591: "f32[][]cuda:0" = torch.ops.aten.copy_.default(arg739_1, getitem_147);  arg739_1 = getitem_147 = copy__591 = None
+	        return ()
+	        
+V0806 13:56:10.005000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "da6c5df15068ccb1dd8f153f4756d68b"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1722977770005635.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:10.110000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9b1896e9c51f43a22f7ad4255b477e14"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977770110168.0,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:10.110000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "81061d19d2bd08c1aaf530a4a0981740"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977770110407.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:10.815000 4107173 torch/_inductor/compile_fx.py:719] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "0d4da5461779497f73f310d8bf96bf31"}
+	
+	import torch
+	from torch import tensor, device
+	import torch.fx as fx
+	from torch._dynamo.testing import rand_strided
+	from math import inf
+	import torch._inductor.inductor_prims
+	
+	import torch._dynamo.config
+	import torch._inductor.config
+	import torch._functorch.config
+	import torch.fx.experimental._config
+	
+	torch._inductor.config.triton.cudagraphs = True
+	torch._functorch.config.unlift_effect_tokens = True
+	
+	
+	
+	isolate_fails_code_str = None
+	
+	
+	
+	# torch version: 2.5.0a0+git6fbc72b
+	# torch cuda version: 12.0
+	# torch git version: 6fbc72b6d764eaeb9ef896840c7996ca2a35188d
+	
+	
+	# CUDA Info: 
+	# nvcc: NVIDIA (R) Cuda compiler driver 
+	# Copyright (c) 2005-2023 NVIDIA Corporation 
+	# Built on Fri_Jan__6_16:45:21_PST_2023 
+	# Cuda compilation tools, release 12.0, V12.0.140 
+	# Build cuda_12.0.r12.0/compiler.32267302_0 
+	
+	# GPU Hardware Info: 
+	# NVIDIA H100 : 1 
+	
+	
+	from torch.nn import *
+	class Repro(torch.nn.Module):
+	    def __init__(self) -> None:
+	        super().__init__()
+	
+	    
+	    
+	    def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1):
+	        _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)
+	        getitem = _foreach_add[0]
+	        getitem_1 = _foreach_add[1]
+	        getitem_2 = _foreach_add[2]
+	        getitem_3 = _foreach_add[3]
+	        getitem_4 = _foreach_add[4]
+	        getitem_5 = _foreach_add[5]
+	        getitem_6 = _foreach_add[6]
+	        getitem_7 = _foreach_add[7]
+	        getitem_8 = _foreach_add[8]
+	        getitem_9 = _foreach_add[9]
+	        getitem_10 = _foreach_add[10]
+	        getitem_11 = _foreach_add[11]
+	        getitem_12 = _foreach_add[12]
+	        getitem_13 = _foreach_add[13]
+	        getitem_14 = _foreach_add[14]
+	        getitem_15 = _foreach_add[15]
+	        getitem_16 = _foreach_add[16]
+	        getitem_17 = _foreach_add[17]
+	        getitem_18 = _foreach_add[18]
+	        getitem_19 = _foreach_add[19]
+	        getitem_20 = _foreach_add[20]
+	        getitem_21 = _foreach_add[21]
+	        getitem_22 = _foreach_add[22]
+	        getitem_23 = _foreach_add[23]
+	        getitem_24 = _foreach_add[24]
+	        getitem_25 = _foreach_add[25]
+	        getitem_26 = _foreach_add[26]
+	        getitem_27 = _foreach_add[27]
+	        getitem_28 = _foreach_add[28]
+	        getitem_29 = _foreach_add[29]
+	        getitem_30 = _foreach_add[30]
+	        getitem_31 = _foreach_add[31]
+	        getitem_32 = _foreach_add[32]
+	        getitem_33 = _foreach_add[33]
+	        getitem_34 = _foreach_add[34]
+	        getitem_35 = _foreach_add[35]
+	        getitem_36 = _foreach_add[36]
+	        getitem_37 = _foreach_add[37]
+	        getitem_38 = _foreach_add[38]
+	        getitem_39 = _foreach_add[39]
+	        getitem_40 = _foreach_add[40]
+	        getitem_41 = _foreach_add[41]
+	        getitem_42 = _foreach_add[42]
+	        getitem_43 = _foreach_add[43]
+	        getitem_44 = _foreach_add[44]
+	        getitem_45 = _foreach_add[45]
+	        getitem_46 = _foreach_add[46]
+	        getitem_47 = _foreach_add[47]
+	        getitem_48 = _foreach_add[48]
+	        getitem_49 = _foreach_add[49]
+	        getitem_50 = _foreach_add[50]
+	        getitem_51 = _foreach_add[51]
+	        getitem_52 = _foreach_add[52]
+	        getitem_53 = _foreach_add[53]
+	        getitem_54 = _foreach_add[54]
+	        getitem_55 = _foreach_add[55]
+	        getitem_56 = _foreach_add[56]
+	        getitem_57 = _foreach_add[57]
+	        getitem_58 = _foreach_add[58]
+	        getitem_59 = _foreach_add[59]
+	        getitem_60 = _foreach_add[60]
+	        getitem_61 = _foreach_add[61]
+	        getitem_62 = _foreach_add[62]
+	        getitem_63 = _foreach_add[63]
+	        getitem_64 = _foreach_add[64]
+	        getitem_65 = _foreach_add[65]
+	        getitem_66 = _foreach_add[66]
+	        getitem_67 = _foreach_add[67]
+	        getitem_68 = _foreach_add[68]
+	        getitem_69 = _foreach_add[69]
+	        getitem_70 = _foreach_add[70]
+	        getitem_71 = _foreach_add[71]
+	        getitem_72 = _foreach_add[72]
+	        getitem_73 = _foreach_add[73]
+	        getitem_74 = _foreach_add[74]
+	        getitem_75 = _foreach_add[75]
+	        getitem_76 = _foreach_add[76]
+	        getitem_77 = _foreach_add[77]
+	        getitem_78 = _foreach_add[78]
+	        getitem_79 = _foreach_add[79]
+	        getitem_80 = _foreach_add[80]
+	        getitem_81 = _foreach_add[81]
+	        getitem_82 = _foreach_add[82]
+	        getitem_83 = _foreach_add[83]
+	        getitem_84 = _foreach_add[84]
+	        getitem_85 = _foreach_add[85]
+	        getitem_86 = _foreach_add[86]
+	        getitem_87 = _foreach_add[87]
+	        getitem_88 = _foreach_add[88]
+	        getitem_89 = _foreach_add[89]
+	        getitem_90 = _foreach_add[90]
+	        getitem_91 = _foreach_add[91]
+	        getitem_92 = _foreach_add[92]
+	        getitem_93 = _foreach_add[93]
+	        getitem_94 = _foreach_add[94]
+	        getitem_95 = _foreach_add[95]
+	        getitem_96 = _foreach_add[96]
+	        getitem_97 = _foreach_add[97]
+	        getitem_98 = _foreach_add[98]
+	        getitem_99 = _foreach_add[99]
+	        getitem_100 = _foreach_add[100]
+	        getitem_101 = _foreach_add[101]
+	        getitem_102 = _foreach_add[102]
+	        getitem_103 = _foreach_add[103]
+	        getitem_104 = _foreach_add[104]
+	        getitem_105 = _foreach_add[105]
+	        getitem_106 = _foreach_add[106]
+	        getitem_107 = _foreach_add[107]
+	        getitem_108 = _foreach_add[108]
+	        getitem_109 = _foreach_add[109]
+	        getitem_110 = _foreach_add[110]
+	        getitem_111 = _foreach_add[111]
+	        getitem_112 = _foreach_add[112]
+	        getitem_113 = _foreach_add[113]
+	        getitem_114 = _foreach_add[114]
+	        getitem_115 = _foreach_add[115]
+	        getitem_116 = _foreach_add[116]
+	        getitem_117 = _foreach_add[117]
+	        getitem_118 = _foreach_add[118]
+	        getitem_119 = _foreach_add[119]
+	        getitem_120 = _foreach_add[120]
+	        getitem_121 = _foreach_add[121]
+	        getitem_122 = _foreach_add[122]
+	        getitem_123 = _foreach_add[123]
+	        getitem_124 = _foreach_add[124]
+	        getitem_125 = _foreach_add[125]
+	        getitem_126 = _foreach_add[126]
+	        getitem_127 = _foreach_add[127]
+	        getitem_128 = _foreach_add[128]
+	        getitem_129 = _foreach_add[129]
+	        getitem_130 = _foreach_add[130]
+	        getitem_131 = _foreach_add[131]
+	        getitem_132 = _foreach_add[132]
+	        getitem_133 = _foreach_add[133]
+	        getitem_134 = _foreach_add[134]
+	        getitem_135 = _foreach_add[135]
+	        getitem_136 = _foreach_add[136]
+	        getitem_137 = _foreach_add[137]
+	        getitem_138 = _foreach_add[138]
+	        getitem_139 = _foreach_add[139]
+	        getitem_140 = _foreach_add[140]
+	        getitem_141 = _foreach_add[141]
+	        getitem_142 = _foreach_add[142]
+	        getitem_143 = _foreach_add[143]
+	        getitem_144 = _foreach_add[144]
+	        getitem_145 = _foreach_add[145]
+	        getitem_146 = _foreach_add[146]
+	        getitem_147 = _foreach_add[147];  _foreach_add = None
+	        _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])
+	        getitem_148 = _foreach_sub[0]
+	        getitem_149 = _foreach_sub[1]
+	        getitem_150 = _foreach_sub[2]
+	        getitem_151 = _foreach_sub[3]
+	        getitem_152 = _foreach_sub[4]
+	        getitem_153 = _foreach_sub[5]
+	        getitem_154 = _foreach_sub[6]
+	        getitem_155 = _foreach_sub[7]
+	        getitem_156 = _foreach_sub[8]
+	        getitem_157 = _foreach_sub[9]
+	        getitem_158 = _foreach_sub[10]
+	        getitem_159 = _foreach_sub[11]
+	        getitem_160 = _foreach_sub[12]
+	        getitem_161 = _foreach_sub[13]
+	        getitem_162 = _foreach_sub[14]
+	        getitem_163 = _foreach_sub[15]
+	        getitem_164 = _foreach_sub[16]
+	        getitem_165 = _foreach_sub[17]
+	        getitem_166 = _foreach_sub[18]
+	        getitem_167 = _foreach_sub[19]
+	        getitem_168 = _foreach_sub[20]
+	        getitem_169 = _foreach_sub[21]
+	        getitem_170 = _foreach_sub[22]
+	        getitem_171 = _foreach_sub[23]
+	        getitem_172 = _foreach_sub[24]
+	        getitem_173 = _foreach_sub[25]
+	        getitem_174 = _foreach_sub[26]
+	        getitem_175 = _foreach_sub[27]
+	        getitem_176 = _foreach_sub[28]
+	        getitem_177 = _foreach_sub[29]
+	        getitem_178 = _foreach_sub[30]
+	        getitem_179 = _foreach_sub[31]
+	        getitem_180 = _foreach_sub[32]
+	        getitem_181 = _foreach_sub[33]
+	        getitem_182 = _foreach_sub[34]
+	        getitem_183 = _foreach_sub[35]
+	        getitem_184 = _foreach_sub[36]
+	        getitem_185 = _foreach_sub[37]
+	        getitem_186 = _foreach_sub[38]
+	        getitem_187 = _foreach_sub[39]
+	        getitem_188 = _foreach_sub[40]
+	        getitem_189 = _foreach_sub[41]
+	        getitem_190 = _foreach_sub[42]
+	        getitem_191 = _foreach_sub[43]
+	        getitem_192 = _foreach_sub[44]
+	        getitem_193 = _foreach_sub[45]
+	        getitem_194 = _foreach_sub[46]
+	        getitem_195 = _foreach_sub[47]
+	        getitem_196 = _foreach_sub[48]
+	        getitem_197 = _foreach_sub[49]
+	        getitem_198 = _foreach_sub[50]
+	        getitem_199 = _foreach_sub[51]
+	        getitem_200 = _foreach_sub[52]
+	        getitem_201 = _foreach_sub[53]
+	        getitem_202 = _foreach_sub[54]
+	        getitem_203 = _foreach_sub[55]
+	        getitem_204 = _foreach_sub[56]
+	        getitem_205 = _foreach_sub[57]
+	        getitem_206 = _foreach_sub[58]
+	        getitem_207 = _foreach_sub[59]
+	        getitem_208 = _foreach_sub[60]
+	        getitem_209 = _foreach_sub[61]
+	        getitem_210 = _foreach_sub[62]
+	        getitem_211 = _foreach_sub[63]
+	        getitem_212 = _foreach_sub[64]
+	        getitem_213 = _foreach_sub[65]
+	        getitem_214 = _foreach_sub[66]
+	        getitem_215 = _foreach_sub[67]
+	        getitem_216 = _foreach_sub[68]
+	        getitem_217 = _foreach_sub[69]
+	        getitem_218 = _foreach_sub[70]
+	        getitem_219 = _foreach_sub[71]
+	        getitem_220 = _foreach_sub[72]
+	        getitem_221 = _foreach_sub[73]
+	        getitem_222 = _foreach_sub[74]
+	        getitem_223 = _foreach_sub[75]
+	        getitem_224 = _foreach_sub[76]
+	        getitem_225 = _foreach_sub[77]
+	        getitem_226 = _foreach_sub[78]
+	        getitem_227 = _foreach_sub[79]
+	        getitem_228 = _foreach_sub[80]
+	        getitem_229 = _foreach_sub[81]
+	        getitem_230 = _foreach_sub[82]
+	        getitem_231 = _foreach_sub[83]
+	        getitem_232 = _foreach_sub[84]
+	        getitem_233 = _foreach_sub[85]
+	        getitem_234 = _foreach_sub[86]
+	        getitem_235 = _foreach_sub[87]
+	        getitem_236 = _foreach_sub[88]
+	        getitem_237 = _foreach_sub[89]
+	        getitem_238 = _foreach_sub[90]
+	        getitem_239 = _foreach_sub[91]
+	        getitem_240 = _foreach_sub[92]
+	        getitem_241 = _foreach_sub[93]
+	        getitem_242 = _foreach_sub[94]
+	        getitem_243 = _foreach_sub[95]
+	        getitem_244 = _foreach_sub[96]
+	        getitem_245 = _foreach_sub[97]
+	        getitem_246 = _foreach_sub[98]
+	        getitem_247 = _foreach_sub[99]
+	        getitem_248 = _foreach_sub[100]
+	        getitem_249 = _foreach_sub[101]
+	        getitem_250 = _foreach_sub[102]
+	        getitem_251 = _foreach_sub[103]
+	        getitem_252 = _foreach_sub[104]
+	        getitem_253 = _foreach_sub[105]
+	        getitem_254 = _foreach_sub[106]
+	        getitem_255 = _foreach_sub[107]
+	        getitem_256 = _foreach_sub[108]
+	        getitem_257 = _foreach_sub[109]
+	        getitem_258 = _foreach_sub[110]
+	        getitem_259 = _foreach_sub[111]
+	        getitem_260 = _foreach_sub[112]
+	        getitem_261 = _foreach_sub[113]
+	        getitem_262 = _foreach_sub[114]
+	        getitem_263 = _foreach_sub[115]
+	        getitem_264 = _foreach_sub[116]
+	        getitem_265 = _foreach_sub[117]
+	        getitem_266 = _foreach_sub[118]
+	        getitem_267 = _foreach_sub[119]
+	        getitem_268 = _foreach_sub[120]
+	        getitem_269 = _foreach_sub[121]
+	        getitem_270 = _foreach_sub[122]
+	        getitem_271 = _foreach_sub[123]
+	        getitem_272 = _foreach_sub[124]
+	        getitem_273 = _foreach_sub[125]
+	        getitem_274 = _foreach_sub[126]
+	        getitem_275 = _foreach_sub[127]
+	        getitem_276 = _foreach_sub[128]
+	        getitem_277 = _foreach_sub[129]
+	        getitem_278 = _foreach_sub[130]
+	        getitem_279 = _foreach_sub[131]
+	        getitem_280 = _foreach_sub[132]
+	        getitem_281 = _foreach_sub[133]
+	        getitem_282 = _foreach_sub[134]
+	        getitem_283 = _foreach_sub[135]
+	        getitem_284 = _foreach_sub[136]
+	        getitem_285 = _foreach_sub[137]
+	        getitem_286 = _foreach_sub[138]
+	        getitem_287 = _foreach_sub[139]
+	        getitem_288 = _foreach_sub[140]
+	        getitem_289 = _foreach_sub[141]
+	        getitem_290 = _foreach_sub[142]
+	        getitem_291 = _foreach_sub[143]
+	        getitem_292 = _foreach_sub[144]
+	        getitem_293 = _foreach_sub[145]
+	        getitem_294 = _foreach_sub[146]
+	        getitem_295 = _foreach_sub[147];  _foreach_sub = None
+	        _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998);  getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None
+	        getitem_296 = _foreach_mul[0]
+	        getitem_297 = _foreach_mul[1]
+	        getitem_298 = _foreach_mul[2]
+	        getitem_299 = _foreach_mul[3]
+	        getitem_300 = _foreach_mul[4]
+	        getitem_301 = _foreach_mul[5]
+	        getitem_302 = _foreach_mul[6]
+	        getitem_303 = _foreach_mul[7]
+	        getitem_304 = _foreach_mul[8]
+	        getitem_305 = _foreach_mul[9]
+	        getitem_306 = _foreach_mul[10]
+	        getitem_307 = _foreach_mul[11]
+	        getitem_308 = _foreach_mul[12]
+	        getitem_309 = _foreach_mul[13]
+	        getitem_310 = _foreach_mul[14]
+	        getitem_311 = _foreach_mul[15]
+	        getitem_312 = _foreach_mul[16]
+	        getitem_313 = _foreach_mul[17]
+	        getitem_314 = _foreach_mul[18]
+	        getitem_315 = _foreach_mul[19]
+	        getitem_316 = _foreach_mul[20]
+	        getitem_317 = _foreach_mul[21]
+	        getitem_318 = _foreach_mul[22]
+	        getitem_319 = _foreach_mul[23]
+	        getitem_320 = _foreach_mul[24]
+	        getitem_321 = _foreach_mul[25]
+	        getitem_322 = _foreach_mul[26]
+	        getitem_323 = _foreach_mul[27]
+	        getitem_324 = _foreach_mul[28]
+	        getitem_325 = _foreach_mul[29]
+	        getitem_326 = _foreach_mul[30]
+	        getitem_327 = _foreach_mul[31]
+	        getitem_328 = _foreach_mul[32]
+	        getitem_329 = _foreach_mul[33]
+	        getitem_330 = _foreach_mul[34]
+	        getitem_331 = _foreach_mul[35]
+	        getitem_332 = _foreach_mul[36]
+	        getitem_333 = _foreach_mul[37]
+	        getitem_334 = _foreach_mul[38]
+	        getitem_335 = _foreach_mul[39]
+	        getitem_336 = _foreach_mul[40]
+	        getitem_337 = _foreach_mul[41]
+	        getitem_338 = _foreach_mul[42]
+	        getitem_339 = _foreach_mul[43]
+	        getitem_340 = _foreach_mul[44]
+	        getitem_341 = _foreach_mul[45]
+	        getitem_342 = _foreach_mul[46]
+	        getitem_343 = _foreach_mul[47]
+	        getitem_344 = _foreach_mul[48]
+	        getitem_345 = _foreach_mul[49]
+	        getitem_346 = _foreach_mul[50]
+	        getitem_347 = _foreach_mul[51]
+	        getitem_348 = _foreach_mul[52]
+	        getitem_349 = _foreach_mul[53]
+	        getitem_350 = _foreach_mul[54]
+	        getitem_351 = _foreach_mul[55]
+	        getitem_352 = _foreach_mul[56]
+	        getitem_353 = _foreach_mul[57]
+	        getitem_354 = _foreach_mul[58]
+	        getitem_355 = _foreach_mul[59]
+	        getitem_356 = _foreach_mul[60]
+	        getitem_357 = _foreach_mul[61]
+	        getitem_358 = _foreach_mul[62]
+	        getitem_359 = _foreach_mul[63]
+	        getitem_360 = _foreach_mul[64]
+	        getitem_361 = _foreach_mul[65]
+	        getitem_362 = _foreach_mul[66]
+	        getitem_363 = _foreach_mul[67]
+	        getitem_364 = _foreach_mul[68]
+	        getitem_365 = _foreach_mul[69]
+	        getitem_366 = _foreach_mul[70]
+	        getitem_367 = _foreach_mul[71]
+	        getitem_368 = _foreach_mul[72]
+	        getitem_369 = _foreach_mul[73]
+	        getitem_370 = _foreach_mul[74]
+	        getitem_371 = _foreach_mul[75]
+	        getitem_372 = _foreach_mul[76]
+	        getitem_373 = _foreach_mul[77]
+	        getitem_374 = _foreach_mul[78]
+	        getitem_375 = _foreach_mul[79]
+	        getitem_376 = _foreach_mul[80]
+	        getitem_377 = _foreach_mul[81]
+	        getitem_378 = _foreach_mul[82]
+	        getitem_379 = _foreach_mul[83]
+	        getitem_380 = _foreach_mul[84]
+	        getitem_381 = _foreach_mul[85]
+	        getitem_382 = _foreach_mul[86]
+	        getitem_383 = _foreach_mul[87]
+	        getitem_384 = _foreach_mul[88]
+	        getitem_385 = _foreach_mul[89]
+	        getitem_386 = _foreach_mul[90]
+	        getitem_387 = _foreach_mul[91]
+	        getitem_388 = _foreach_mul[92]
+	        getitem_389 = _foreach_mul[93]
+	        getitem_390 = _foreach_mul[94]
+	        getitem_391 = _foreach_mul[95]
+	        getitem_392 = _foreach_mul[96]
+	        getitem_393 = _foreach_mul[97]
+	        getitem_394 = _foreach_mul[98]
+	        getitem_395 = _foreach_mul[99]
+	        getitem_396 = _foreach_mul[100]
+	        getitem_397 = _foreach_mul[101]
+	        getitem_398 = _foreach_mul[102]
+	        getitem_399 = _foreach_mul[103]
+	        getitem_400 = _foreach_mul[104]
+	        getitem_401 = _foreach_mul[105]
+	        getitem_402 = _foreach_mul[106]
+	        getitem_403 = _foreach_mul[107]
+	        getitem_404 = _foreach_mul[108]
+	        getitem_405 = _foreach_mul[109]
+	        getitem_406 = _foreach_mul[110]
+	        getitem_407 = _foreach_mul[111]
+	        getitem_408 = _foreach_mul[112]
+	        getitem_409 = _foreach_mul[113]
+	        getitem_410 = _foreach_mul[114]
+	        getitem_411 = _foreach_mul[115]
+	        getitem_412 = _foreach_mul[116]
+	        getitem_413 = _foreach_mul[117]
+	        getitem_414 = _foreach_mul[118]
+	        getitem_415 = _foreach_mul[119]
+	        getitem_416 = _foreach_mul[120]
+	        getitem_417 = _foreach_mul[121]
+	        getitem_418 = _foreach_mul[122]
+	        getitem_419 = _foreach_mul[123]
+	        getitem_420 = _foreach_mul[124]
+	        getitem_421 = _foreach_mul[125]
+	        getitem_422 = _foreach_mul[126]
+	        getitem_423 = _foreach_mul[127]
+	        getitem_424 = _foreach_mul[128]
+	        getitem_425 = _foreach_mul[129]
+	        getitem_426 = _foreach_mul[130]
+	        getitem_427 = _foreach_mul[131]
+	        getitem_428 = _foreach_mul[132]
+	        getitem_429 = _foreach_mul[133]
+	        getitem_430 = _foreach_mul[134]
+	        getitem_431 = _foreach_mul[135]
+	        getitem_432 = _foreach_mul[136]
+	        getitem_433 = _foreach_mul[137]
+	        getitem_434 = _foreach_mul[138]
+	        getitem_435 = _foreach_mul[139]
+	        getitem_436 = _foreach_mul[140]
+	        getitem_437 = _foreach_mul[141]
+	        getitem_438 = _foreach_mul[142]
+	        getitem_439 = _foreach_mul[143]
+	        getitem_440 = _foreach_mul[144]
+	        getitem_441 = _foreach_mul[145]
+	        getitem_442 = _foreach_mul[146]
+	        getitem_443 = _foreach_mul[147];  _foreach_mul = None
+	        _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]);  getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None
+	        getitem_444 = _foreach_add_1[0]
+	        getitem_445 = _foreach_add_1[1]
+	        getitem_446 = _foreach_add_1[2]
+	        getitem_447 = _foreach_add_1[3]
+	        getitem_448 = _foreach_add_1[4]
+	        getitem_449 = _foreach_add_1[5]
+	        getitem_450 = _foreach_add_1[6]
+	        getitem_451 = _foreach_add_1[7]
+	        getitem_452 = _foreach_add_1[8]
+	        getitem_453 = _foreach_add_1[9]
+	        getitem_454 = _foreach_add_1[10]
+	        getitem_455 = _foreach_add_1[11]
+	        getitem_456 = _foreach_add_1[12]
+	        getitem_457 = _foreach_add_1[13]
+	        getitem_458 = _foreach_add_1[14]
+	        getitem_459 = _foreach_add_1[15]
+	        getitem_460 = _foreach_add_1[16]
+	        getitem_461 = _foreach_add_1[17]
+	        getitem_462 = _foreach_add_1[18]
+	        getitem_463 = _foreach_add_1[19]
+	        getitem_464 = _foreach_add_1[20]
+	        getitem_465 = _foreach_add_1[21]
+	        getitem_466 = _foreach_add_1[22]
+	        getitem_467 = _foreach_add_1[23]
+	        getitem_468 = _foreach_add_1[24]
+	        getitem_469 = _foreach_add_1[25]
+	        getitem_470 = _foreach_add_1[26]
+	        getitem_471 = _foreach_add_1[27]
+	        getitem_472 = _foreach_add_1[28]
+	        getitem_473 = _foreach_add_1[29]
+	        getitem_474 = _foreach_add_1[30]
+	        getitem_475 = _foreach_add_1[31]
+	        getitem_476 = _foreach_add_1[32]
+	        getitem_477 = _foreach_add_1[33]
+	        getitem_478 = _foreach_add_1[34]
+	        getitem_479 = _foreach_add_1[35]
+	        getitem_480 = _foreach_add_1[36]
+	        getitem_481 = _foreach_add_1[37]
+	        getitem_482 = _foreach_add_1[38]
+	        getitem_483 = _foreach_add_1[39]
+	        getitem_484 = _foreach_add_1[40]
+	        getitem_485 = _foreach_add_1[41]
+	        getitem_486 = _foreach_add_1[42]
+	        getitem_487 = _foreach_add_1[43]
+	        getitem_488 = _foreach_add_1[44]
+	        getitem_489 = _foreach_add_1[45]
+	        getitem_490 = _foreach_add_1[46]
+	        getitem_491 = _foreach_add_1[47]
+	        getitem_492 = _foreach_add_1[48]
+	        getitem_493 = _foreach_add_1[49]
+	        getitem_494 = _foreach_add_1[50]
+	        getitem_495 = _foreach_add_1[51]
+	        getitem_496 = _foreach_add_1[52]
+	        getitem_497 = _foreach_add_1[53]
+	        getitem_498 = _foreach_add_1[54]
+	        getitem_499 = _foreach_add_1[55]
+	        getitem_500 = _foreach_add_1[56]
+	        getitem_501 = _foreach_add_1[57]
+	        getitem_502 = _foreach_add_1[58]
+	        getitem_503 = _foreach_add_1[59]
+	        getitem_504 = _foreach_add_1[60]
+	        getitem_505 = _foreach_add_1[61]
+	        getitem_506 = _foreach_add_1[62]
+	        getitem_507 = _foreach_add_1[63]
+	        getitem_508 = _foreach_add_1[64]
+	        getitem_509 = _foreach_add_1[65]
+	        getitem_510 = _foreach_add_1[66]
+	        getitem_511 = _foreach_add_1[67]
+	        getitem_512 = _foreach_add_1[68]
+	        getitem_513 = _foreach_add_1[69]
+	        getitem_514 = _foreach_add_1[70]
+	        getitem_515 = _foreach_add_1[71]
+	        getitem_516 = _foreach_add_1[72]
+	        getitem_517 = _foreach_add_1[73]
+	        getitem_518 = _foreach_add_1[74]
+	        getitem_519 = _foreach_add_1[75]
+	        getitem_520 = _foreach_add_1[76]
+	        getitem_521 = _foreach_add_1[77]
+	        getitem_522 = _foreach_add_1[78]
+	        getitem_523 = _foreach_add_1[79]
+	        getitem_524 = _foreach_add_1[80]
+	        getitem_525 = _foreach_add_1[81]
+	        getitem_526 = _foreach_add_1[82]
+	        getitem_527 = _foreach_add_1[83]
+	        getitem_528 = _foreach_add_1[84]
+	        getitem_529 = _foreach_add_1[85]
+	        getitem_530 = _foreach_add_1[86]
+	        getitem_531 = _foreach_add_1[87]
+	        getitem_532 = _foreach_add_1[88]
+	        getitem_533 = _foreach_add_1[89]
+	        getitem_534 = _foreach_add_1[90]
+	        getitem_535 = _foreach_add_1[91]
+	        getitem_536 = _foreach_add_1[92]
+	        getitem_537 = _foreach_add_1[93]
+	        getitem_538 = _foreach_add_1[94]
+	        getitem_539 = _foreach_add_1[95]
+	        getitem_540 = _foreach_add_1[96]
+	        getitem_541 = _foreach_add_1[97]
+	        getitem_542 = _foreach_add_1[98]
+	        getitem_543 = _foreach_add_1[99]
+	        getitem_544 = _foreach_add_1[100]
+	        getitem_545 = _foreach_add_1[101]
+	        getitem_546 = _foreach_add_1[102]
+	        getitem_547 = _foreach_add_1[103]
+	        getitem_548 = _foreach_add_1[104]
+	        getitem_549 = _foreach_add_1[105]
+	        getitem_550 = _foreach_add_1[106]
+	        getitem_551 = _foreach_add_1[107]
+	        getitem_552 = _foreach_add_1[108]
+	        getitem_553 = _foreach_add_1[109]
+	        getitem_554 = _foreach_add_1[110]
+	        getitem_555 = _foreach_add_1[111]
+	        getitem_556 = _foreach_add_1[112]
+	        getitem_557 = _foreach_add_1[113]
+	        getitem_558 = _foreach_add_1[114]
+	        getitem_559 = _foreach_add_1[115]
+	        getitem_560 = _foreach_add_1[116]
+	        getitem_561 = _foreach_add_1[117]
+	        getitem_562 = _foreach_add_1[118]
+	        getitem_563 = _foreach_add_1[119]
+	        getitem_564 = _foreach_add_1[120]
+	        getitem_565 = _foreach_add_1[121]
+	        getitem_566 = _foreach_add_1[122]
+	        getitem_567 = _foreach_add_1[123]
+	        getitem_568 = _foreach_add_1[124]
+	        getitem_569 = _foreach_add_1[125]
+	        getitem_570 = _foreach_add_1[126]
+	        getitem_571 = _foreach_add_1[127]
+	        getitem_572 = _foreach_add_1[128]
+	        getitem_573 = _foreach_add_1[129]
+	        getitem_574 = _foreach_add_1[130]
+	        getitem_575 = _foreach_add_1[131]
+	        getitem_576 = _foreach_add_1[132]
+	        getitem_577 = _foreach_add_1[133]
+	        getitem_578 = _foreach_add_1[134]
+	        getitem_579 = _foreach_add_1[135]
+	        getitem_580 = _foreach_add_1[136]
+	        getitem_581 = _foreach_add_1[137]
+	        getitem_582 = _foreach_add_1[138]
+	        getitem_583 = _foreach_add_1[139]
+	        getitem_584 = _foreach_add_1[140]
+	        getitem_585 = _foreach_add_1[141]
+	        getitem_586 = _foreach_add_1[142]
+	        getitem_587 = _foreach_add_1[143]
+	        getitem_588 = _foreach_add_1[144]
+	        getitem_589 = _foreach_add_1[145]
+	        getitem_590 = _foreach_add_1[146]
+	        getitem_591 = _foreach_add_1[147];  _foreach_add_1 = None
+	        _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)
+	        getitem_592 = _foreach_mul_1[0]
+	        getitem_593 = _foreach_mul_1[1]
+	        getitem_594 = _foreach_mul_1[2]
+	        getitem_595 = _foreach_mul_1[3]
+	        getitem_596 = _foreach_mul_1[4]
+	        getitem_597 = _foreach_mul_1[5]
+	        getitem_598 = _foreach_mul_1[6]
+	        getitem_599 = _foreach_mul_1[7]
+	        getitem_600 = _foreach_mul_1[8]
+	        getitem_601 = _foreach_mul_1[9]
+	        getitem_602 = _foreach_mul_1[10]
+	        getitem_603 = _foreach_mul_1[11]
+	        getitem_604 = _foreach_mul_1[12]
+	        getitem_605 = _foreach_mul_1[13]
+	        getitem_606 = _foreach_mul_1[14]
+	        getitem_607 = _foreach_mul_1[15]
+	        getitem_608 = _foreach_mul_1[16]
+	        getitem_609 = _foreach_mul_1[17]
+	        getitem_610 = _foreach_mul_1[18]
+	        getitem_611 = _foreach_mul_1[19]
+	        getitem_612 = _foreach_mul_1[20]
+	        getitem_613 = _foreach_mul_1[21]
+	        getitem_614 = _foreach_mul_1[22]
+	        getitem_615 = _foreach_mul_1[23]
+	        getitem_616 = _foreach_mul_1[24]
+	        getitem_617 = _foreach_mul_1[25]
+	        getitem_618 = _foreach_mul_1[26]
+	        getitem_619 = _foreach_mul_1[27]
+	        getitem_620 = _foreach_mul_1[28]
+	        getitem_621 = _foreach_mul_1[29]
+	        getitem_622 = _foreach_mul_1[30]
+	        getitem_623 = _foreach_mul_1[31]
+	        getitem_624 = _foreach_mul_1[32]
+	        getitem_625 = _foreach_mul_1[33]
+	        getitem_626 = _foreach_mul_1[34]
+	        getitem_627 = _foreach_mul_1[35]
+	        getitem_628 = _foreach_mul_1[36]
+	        getitem_629 = _foreach_mul_1[37]
+	        getitem_630 = _foreach_mul_1[38]
+	        getitem_631 = _foreach_mul_1[39]
+	        getitem_632 = _foreach_mul_1[40]
+	        getitem_633 = _foreach_mul_1[41]
+	        getitem_634 = _foreach_mul_1[42]
+	        getitem_635 = _foreach_mul_1[43]
+	        getitem_636 = _foreach_mul_1[44]
+	        getitem_637 = _foreach_mul_1[45]
+	        getitem_638 = _foreach_mul_1[46]
+	        getitem_639 = _foreach_mul_1[47]
+	        getitem_640 = _foreach_mul_1[48]
+	        getitem_641 = _foreach_mul_1[49]
+	        getitem_642 = _foreach_mul_1[50]
+	        getitem_643 = _foreach_mul_1[51]
+	        getitem_644 = _foreach_mul_1[52]
+	        getitem_645 = _foreach_mul_1[53]
+	        getitem_646 = _foreach_mul_1[54]
+	        getitem_647 = _foreach_mul_1[55]
+	        getitem_648 = _foreach_mul_1[56]
+	        getitem_649 = _foreach_mul_1[57]
+	        getitem_650 = _foreach_mul_1[58]
+	        getitem_651 = _foreach_mul_1[59]
+	        getitem_652 = _foreach_mul_1[60]
+	        getitem_653 = _foreach_mul_1[61]
+	        getitem_654 = _foreach_mul_1[62]
+	        getitem_655 = _foreach_mul_1[63]
+	        getitem_656 = _foreach_mul_1[64]
+	        getitem_657 = _foreach_mul_1[65]
+	        getitem_658 = _foreach_mul_1[66]
+	        getitem_659 = _foreach_mul_1[67]
+	        getitem_660 = _foreach_mul_1[68]
+	        getitem_661 = _foreach_mul_1[69]
+	        getitem_662 = _foreach_mul_1[70]
+	        getitem_663 = _foreach_mul_1[71]
+	        getitem_664 = _foreach_mul_1[72]
+	        getitem_665 = _foreach_mul_1[73]
+	        getitem_666 = _foreach_mul_1[74]
+	        getitem_667 = _foreach_mul_1[75]
+	        getitem_668 = _foreach_mul_1[76]
+	        getitem_669 = _foreach_mul_1[77]
+	        getitem_670 = _foreach_mul_1[78]
+	        getitem_671 = _foreach_mul_1[79]
+	        getitem_672 = _foreach_mul_1[80]
+	        getitem_673 = _foreach_mul_1[81]
+	        getitem_674 = _foreach_mul_1[82]
+	        getitem_675 = _foreach_mul_1[83]
+	        getitem_676 = _foreach_mul_1[84]
+	        getitem_677 = _foreach_mul_1[85]
+	        getitem_678 = _foreach_mul_1[86]
+	        getitem_679 = _foreach_mul_1[87]
+	        getitem_680 = _foreach_mul_1[88]
+	        getitem_681 = _foreach_mul_1[89]
+	        getitem_682 = _foreach_mul_1[90]
+	        getitem_683 = _foreach_mul_1[91]
+	        getitem_684 = _foreach_mul_1[92]
+	        getitem_685 = _foreach_mul_1[93]
+	        getitem_686 = _foreach_mul_1[94]
+	        getitem_687 = _foreach_mul_1[95]
+	        getitem_688 = _foreach_mul_1[96]
+	        getitem_689 = _foreach_mul_1[97]
+	        getitem_690 = _foreach_mul_1[98]
+	        getitem_691 = _foreach_mul_1[99]
+	        getitem_692 = _foreach_mul_1[100]
+	        getitem_693 = _foreach_mul_1[101]
+	        getitem_694 = _foreach_mul_1[102]
+	        getitem_695 = _foreach_mul_1[103]
+	        getitem_696 = _foreach_mul_1[104]
+	        getitem_697 = _foreach_mul_1[105]
+	        getitem_698 = _foreach_mul_1[106]
+	        getitem_699 = _foreach_mul_1[107]
+	        getitem_700 = _foreach_mul_1[108]
+	        getitem_701 = _foreach_mul_1[109]
+	        getitem_702 = _foreach_mul_1[110]
+	        getitem_703 = _foreach_mul_1[111]
+	        getitem_704 = _foreach_mul_1[112]
+	        getitem_705 = _foreach_mul_1[113]
+	        getitem_706 = _foreach_mul_1[114]
+	        getitem_707 = _foreach_mul_1[115]
+	        getitem_708 = _foreach_mul_1[116]
+	        getitem_709 = _foreach_mul_1[117]
+	        getitem_710 = _foreach_mul_1[118]
+	        getitem_711 = _foreach_mul_1[119]
+	        getitem_712 = _foreach_mul_1[120]
+	        getitem_713 = _foreach_mul_1[121]
+	        getitem_714 = _foreach_mul_1[122]
+	        getitem_715 = _foreach_mul_1[123]
+	        getitem_716 = _foreach_mul_1[124]
+	        getitem_717 = _foreach_mul_1[125]
+	        getitem_718 = _foreach_mul_1[126]
+	        getitem_719 = _foreach_mul_1[127]
+	        getitem_720 = _foreach_mul_1[128]
+	        getitem_721 = _foreach_mul_1[129]
+	        getitem_722 = _foreach_mul_1[130]
+	        getitem_723 = _foreach_mul_1[131]
+	        getitem_724 = _foreach_mul_1[132]
+	        getitem_725 = _foreach_mul_1[133]
+	        getitem_726 = _foreach_mul_1[134]
+	        getitem_727 = _foreach_mul_1[135]
+	        getitem_728 = _foreach_mul_1[136]
+	        getitem_729 = _foreach_mul_1[137]
+	        getitem_730 = _foreach_mul_1[138]
+	        getitem_731 = _foreach_mul_1[139]
+	        getitem_732 = _foreach_mul_1[140]
+	        getitem_733 = _foreach_mul_1[141]
+	        getitem_734 = _foreach_mul_1[142]
+	        getitem_735 = _foreach_mul_1[143]
+	        getitem_736 = _foreach_mul_1[144]
+	        getitem_737 = _foreach_mul_1[145]
+	        getitem_738 = _foreach_mul_1[146]
+	        getitem_739 = _foreach_mul_1[147];  _foreach_mul_1 = None
+	        _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]);  arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None
+	        getitem_740 = _foreach_mul_2[0]
+	        getitem_741 = _foreach_mul_2[1]
+	        getitem_742 = _foreach_mul_2[2]
+	        getitem_743 = _foreach_mul_2[3]
+	        getitem_744 = _foreach_mul_2[4]
+	        getitem_745 = _foreach_mul_2[5]
+	        getitem_746 = _foreach_mul_2[6]
+	        getitem_747 = _foreach_mul_2[7]
+	        getitem_748 = _foreach_mul_2[8]
+	        getitem_749 = _foreach_mul_2[9]
+	        getitem_750 = _foreach_mul_2[10]
+	        getitem_751 = _foreach_mul_2[11]
+	        getitem_752 = _foreach_mul_2[12]
+	        getitem_753 = _foreach_mul_2[13]
+	        getitem_754 = _foreach_mul_2[14]
+	        getitem_755 = _foreach_mul_2[15]
+	        getitem_756 = _foreach_mul_2[16]
+	        getitem_757 = _foreach_mul_2[17]
+	        getitem_758 = _foreach_mul_2[18]
+	        getitem_759 = _foreach_mul_2[19]
+	        getitem_760 = _foreach_mul_2[20]
+	        getitem_761 = _foreach_mul_2[21]
+	        getitem_762 = _foreach_mul_2[22]
+	        getitem_763 = _foreach_mul_2[23]
+	        getitem_764 = _foreach_mul_2[24]
+	        getitem_765 = _foreach_mul_2[25]
+	        getitem_766 = _foreach_mul_2[26]
+	        getitem_767 = _foreach_mul_2[27]
+	        getitem_768 = _foreach_mul_2[28]
+	        getitem_769 = _foreach_mul_2[29]
+	        getitem_770 = _foreach_mul_2[30]
+	        getitem_771 = _foreach_mul_2[31]
+	        getitem_772 = _foreach_mul_2[32]
+	        getitem_773 = _foreach_mul_2[33]
+	        getitem_774 = _foreach_mul_2[34]
+	        getitem_775 = _foreach_mul_2[35]
+	        getitem_776 = _foreach_mul_2[36]
+	        getitem_777 = _foreach_mul_2[37]
+	        getitem_778 = _foreach_mul_2[38]
+	        getitem_779 = _foreach_mul_2[39]
+	        getitem_780 = _foreach_mul_2[40]
+	        getitem_781 = _foreach_mul_2[41]
+	        getitem_782 = _foreach_mul_2[42]
+	        getitem_783 = _foreach_mul_2[43]
+	        getitem_784 = _foreach_mul_2[44]
+	        getitem_785 = _foreach_mul_2[45]
+	        getitem_786 = _foreach_mul_2[46]
+	        getitem_787 = _foreach_mul_2[47]
+	        getitem_788 = _foreach_mul_2[48]
+	        getitem_789 = _foreach_mul_2[49]
+	        getitem_790 = _foreach_mul_2[50]
+	        getitem_791 = _foreach_mul_2[51]
+	        getitem_792 = _foreach_mul_2[52]
+	        getitem_793 = _foreach_mul_2[53]
+	        getitem_794 = _foreach_mul_2[54]
+	        getitem_795 = _foreach_mul_2[55]
+	        getitem_796 = _foreach_mul_2[56]
+	        getitem_797 = _foreach_mul_2[57]
+	        getitem_798 = _foreach_mul_2[58]
+	        getitem_799 = _foreach_mul_2[59]
+	        getitem_800 = _foreach_mul_2[60]
+	        getitem_801 = _foreach_mul_2[61]
+	        getitem_802 = _foreach_mul_2[62]
+	        getitem_803 = _foreach_mul_2[63]
+	        getitem_804 = _foreach_mul_2[64]
+	        getitem_805 = _foreach_mul_2[65]
+	        getitem_806 = _foreach_mul_2[66]
+	        getitem_807 = _foreach_mul_2[67]
+	        getitem_808 = _foreach_mul_2[68]
+	        getitem_809 = _foreach_mul_2[69]
+	        getitem_810 = _foreach_mul_2[70]
+	        getitem_811 = _foreach_mul_2[71]
+	        getitem_812 = _foreach_mul_2[72]
+	        getitem_813 = _foreach_mul_2[73]
+	        getitem_814 = _foreach_mul_2[74]
+	        getitem_815 = _foreach_mul_2[75]
+	        getitem_816 = _foreach_mul_2[76]
+	        getitem_817 = _foreach_mul_2[77]
+	        getitem_818 = _foreach_mul_2[78]
+	        getitem_819 = _foreach_mul_2[79]
+	        getitem_820 = _foreach_mul_2[80]
+	        getitem_821 = _foreach_mul_2[81]
+	        getitem_822 = _foreach_mul_2[82]
+	        getitem_823 = _foreach_mul_2[83]
+	        getitem_824 = _foreach_mul_2[84]
+	        getitem_825 = _foreach_mul_2[85]
+	        getitem_826 = _foreach_mul_2[86]
+	        getitem_827 = _foreach_mul_2[87]
+	        getitem_828 = _foreach_mul_2[88]
+	        getitem_829 = _foreach_mul_2[89]
+	        getitem_830 = _foreach_mul_2[90]
+	        getitem_831 = _foreach_mul_2[91]
+	        getitem_832 = _foreach_mul_2[92]
+	        getitem_833 = _foreach_mul_2[93]
+	        getitem_834 = _foreach_mul_2[94]
+	        getitem_835 = _foreach_mul_2[95]
+	        getitem_836 = _foreach_mul_2[96]
+	        getitem_837 = _foreach_mul_2[97]
+	        getitem_838 = _foreach_mul_2[98]
+	        getitem_839 = _foreach_mul_2[99]
+	        getitem_840 = _foreach_mul_2[100]
+	        getitem_841 = _foreach_mul_2[101]
+	        getitem_842 = _foreach_mul_2[102]
+	        getitem_843 = _foreach_mul_2[103]
+	        getitem_844 = _foreach_mul_2[104]
+	        getitem_845 = _foreach_mul_2[105]
+	        getitem_846 = _foreach_mul_2[106]
+	        getitem_847 = _foreach_mul_2[107]
+	        getitem_848 = _foreach_mul_2[108]
+	        getitem_849 = _foreach_mul_2[109]
+	        getitem_850 = _foreach_mul_2[110]
+	        getitem_851 = _foreach_mul_2[111]
+	        getitem_852 = _foreach_mul_2[112]
+	        getitem_853 = _foreach_mul_2[113]
+	        getitem_854 = _foreach_mul_2[114]
+	        getitem_855 = _foreach_mul_2[115]
+	        getitem_856 = _foreach_mul_2[116]
+	        getitem_857 = _foreach_mul_2[117]
+	        getitem_858 = _foreach_mul_2[118]
+	        getitem_859 = _foreach_mul_2[119]
+	        getitem_860 = _foreach_mul_2[120]
+	        getitem_861 = _foreach_mul_2[121]
+	        getitem_862 = _foreach_mul_2[122]
+	        getitem_863 = _foreach_mul_2[123]
+	        getitem_864 = _foreach_mul_2[124]
+	        getitem_865 = _foreach_mul_2[125]
+	        getitem_866 = _foreach_mul_2[126]
+	        getitem_867 = _foreach_mul_2[127]
+	        getitem_868 = _foreach_mul_2[128]
+	        getitem_869 = _foreach_mul_2[129]
+	        getitem_870 = _foreach_mul_2[130]
+	        getitem_871 = _foreach_mul_2[131]
+	        getitem_872 = _foreach_mul_2[132]
+	        getitem_873 = _foreach_mul_2[133]
+	        getitem_874 = _foreach_mul_2[134]
+	        getitem_875 = _foreach_mul_2[135]
+	        getitem_876 = _foreach_mul_2[136]
+	        getitem_877 = _foreach_mul_2[137]
+	        getitem_878 = _foreach_mul_2[138]
+	        getitem_879 = _foreach_mul_2[139]
+	        getitem_880 = _foreach_mul_2[140]
+	        getitem_881 = _foreach_mul_2[141]
+	        getitem_882 = _foreach_mul_2[142]
+	        getitem_883 = _foreach_mul_2[143]
+	        getitem_884 = _foreach_mul_2[144]
+	        getitem_885 = _foreach_mul_2[145]
+	        getitem_886 = _foreach_mul_2[146]
+	        getitem_887 = _foreach_mul_2[147];  _foreach_mul_2 = None
+	        _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009);  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None
+	        getitem_888 = _foreach_add_2[0]
+	        getitem_889 = _foreach_add_2[1]
+	        getitem_890 = _foreach_add_2[2]
+	        getitem_891 = _foreach_add_2[3]
+	        getitem_892 = _foreach_add_2[4]
+	        getitem_893 = _foreach_add_2[5]
+	        getitem_894 = _foreach_add_2[6]
+	        getitem_895 = _foreach_add_2[7]
+	        getitem_896 = _foreach_add_2[8]
+	        getitem_897 = _foreach_add_2[9]
+	        getitem_898 = _foreach_add_2[10]
+	        getitem_899 = _foreach_add_2[11]
+	        getitem_900 = _foreach_add_2[12]
+	        getitem_901 = _foreach_add_2[13]
+	        getitem_902 = _foreach_add_2[14]
+	        getitem_903 = _foreach_add_2[15]
+	        getitem_904 = _foreach_add_2[16]
+	        getitem_905 = _foreach_add_2[17]
+	        getitem_906 = _foreach_add_2[18]
+	        getitem_907 = _foreach_add_2[19]
+	        getitem_908 = _foreach_add_2[20]
+	        getitem_909 = _foreach_add_2[21]
+	        getitem_910 = _foreach_add_2[22]
+	        getitem_911 = _foreach_add_2[23]
+	        getitem_912 = _foreach_add_2[24]
+	        getitem_913 = _foreach_add_2[25]
+	        getitem_914 = _foreach_add_2[26]
+	        getitem_915 = _foreach_add_2[27]
+	        getitem_916 = _foreach_add_2[28]
+	        getitem_917 = _foreach_add_2[29]
+	        getitem_918 = _foreach_add_2[30]
+	        getitem_919 = _foreach_add_2[31]
+	        getitem_920 = _foreach_add_2[32]
+	        getitem_921 = _foreach_add_2[33]
+	        getitem_922 = _foreach_add_2[34]
+	        getitem_923 = _foreach_add_2[35]
+	        getitem_924 = _foreach_add_2[36]
+	        getitem_925 = _foreach_add_2[37]
+	        getitem_926 = _foreach_add_2[38]
+	        getitem_927 = _foreach_add_2[39]
+	        getitem_928 = _foreach_add_2[40]
+	        getitem_929 = _foreach_add_2[41]
+	        getitem_930 = _foreach_add_2[42]
+	        getitem_931 = _foreach_add_2[43]
+	        getitem_932 = _foreach_add_2[44]
+	        getitem_933 = _foreach_add_2[45]
+	        getitem_934 = _foreach_add_2[46]
+	        getitem_935 = _foreach_add_2[47]
+	        getitem_936 = _foreach_add_2[48]
+	        getitem_937 = _foreach_add_2[49]
+	        getitem_938 = _foreach_add_2[50]
+	        getitem_939 = _foreach_add_2[51]
+	        getitem_940 = _foreach_add_2[52]
+	        getitem_941 = _foreach_add_2[53]
+	        getitem_942 = _foreach_add_2[54]
+	        getitem_943 = _foreach_add_2[55]
+	        getitem_944 = _foreach_add_2[56]
+	        getitem_945 = _foreach_add_2[57]
+	        getitem_946 = _foreach_add_2[58]
+	        getitem_947 = _foreach_add_2[59]
+	        getitem_948 = _foreach_add_2[60]
+	        getitem_949 = _foreach_add_2[61]
+	        getitem_950 = _foreach_add_2[62]
+	        getitem_951 = _foreach_add_2[63]
+	        getitem_952 = _foreach_add_2[64]
+	        getitem_953 = _foreach_add_2[65]
+	        getitem_954 = _foreach_add_2[66]
+	        getitem_955 = _foreach_add_2[67]
+	        getitem_956 = _foreach_add_2[68]
+	        getitem_957 = _foreach_add_2[69]
+	        getitem_958 = _foreach_add_2[70]
+	        getitem_959 = _foreach_add_2[71]
+	        getitem_960 = _foreach_add_2[72]
+	        getitem_961 = _foreach_add_2[73]
+	        getitem_962 = _foreach_add_2[74]
+	        getitem_963 = _foreach_add_2[75]
+	        getitem_964 = _foreach_add_2[76]
+	        getitem_965 = _foreach_add_2[77]
+	        getitem_966 = _foreach_add_2[78]
+	        getitem_967 = _foreach_add_2[79]
+	        getitem_968 = _foreach_add_2[80]
+	        getitem_969 = _foreach_add_2[81]
+	        getitem_970 = _foreach_add_2[82]
+	        getitem_971 = _foreach_add_2[83]
+	        getitem_972 = _foreach_add_2[84]
+	        getitem_973 = _foreach_add_2[85]
+	        getitem_974 = _foreach_add_2[86]
+	        getitem_975 = _foreach_add_2[87]
+	        getitem_976 = _foreach_add_2[88]
+	        getitem_977 = _foreach_add_2[89]
+	        getitem_978 = _foreach_add_2[90]
+	        getitem_979 = _foreach_add_2[91]
+	        getitem_980 = _foreach_add_2[92]
+	        getitem_981 = _foreach_add_2[93]
+	        getitem_982 = _foreach_add_2[94]
+	        getitem_983 = _foreach_add_2[95]
+	        getitem_984 = _foreach_add_2[96]
+	        getitem_985 = _foreach_add_2[97]
+	        getitem_986 = _foreach_add_2[98]
+	        getitem_987 = _foreach_add_2[99]
+	        getitem_988 = _foreach_add_2[100]
+	        getitem_989 = _foreach_add_2[101]
+	        getitem_990 = _foreach_add_2[102]
+	        getitem_991 = _foreach_add_2[103]
+	        getitem_992 = _foreach_add_2[104]
+	        getitem_993 = _foreach_add_2[105]
+	        getitem_994 = _foreach_add_2[106]
+	        getitem_995 = _foreach_add_2[107]
+	        getitem_996 = _foreach_add_2[108]
+	        getitem_997 = _foreach_add_2[109]
+	        getitem_998 = _foreach_add_2[110]
+	        getitem_999 = _foreach_add_2[111]
+	        getitem_1000 = _foreach_add_2[112]
+	        getitem_1001 = _foreach_add_2[113]
+	        getitem_1002 = _foreach_add_2[114]
+	        getitem_1003 = _foreach_add_2[115]
+	        getitem_1004 = _foreach_add_2[116]
+	        getitem_1005 = _foreach_add_2[117]
+	        getitem_1006 = _foreach_add_2[118]
+	        getitem_1007 = _foreach_add_2[119]
+	        getitem_1008 = _foreach_add_2[120]
+	        getitem_1009 = _foreach_add_2[121]
+	        getitem_1010 = _foreach_add_2[122]
+	        getitem_1011 = _foreach_add_2[123]
+	        getitem_1012 = _foreach_add_2[124]
+	        getitem_1013 = _foreach_add_2[125]
+	        getitem_1014 = _foreach_add_2[126]
+	        getitem_1015 = _foreach_add_2[127]
+	        getitem_1016 = _foreach_add_2[128]
+	        getitem_1017 = _foreach_add_2[129]
+	        getitem_1018 = _foreach_add_2[130]
+	        getitem_1019 = _foreach_add_2[131]
+	        getitem_1020 = _foreach_add_2[132]
+	        getitem_1021 = _foreach_add_2[133]
+	        getitem_1022 = _foreach_add_2[134]
+	        getitem_1023 = _foreach_add_2[135]
+	        getitem_1024 = _foreach_add_2[136]
+	        getitem_1025 = _foreach_add_2[137]
+	        getitem_1026 = _foreach_add_2[138]
+	        getitem_1027 = _foreach_add_2[139]
+	        getitem_1028 = _foreach_add_2[140]
+	        getitem_1029 = _foreach_add_2[141]
+	        getitem_1030 = _foreach_add_2[142]
+	        getitem_1031 = _foreach_add_2[143]
+	        getitem_1032 = _foreach_add_2[144]
+	        getitem_1033 = _foreach_add_2[145]
+	        getitem_1034 = _foreach_add_2[146]
+	        getitem_1035 = _foreach_add_2[147];  _foreach_add_2 = None
+	        _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])
+	        getitem_1036 = _foreach_pow[0]
+	        getitem_1037 = _foreach_pow[1]
+	        getitem_1038 = _foreach_pow[2]
+	        getitem_1039 = _foreach_pow[3]
+	        getitem_1040 = _foreach_pow[4]
+	        getitem_1041 = _foreach_pow[5]
+	        getitem_1042 = _foreach_pow[6]
+	        getitem_1043 = _foreach_pow[7]
+	        getitem_1044 = _foreach_pow[8]
+	        getitem_1045 = _foreach_pow[9]
+	        getitem_1046 = _foreach_pow[10]
+	        getitem_1047 = _foreach_pow[11]
+	        getitem_1048 = _foreach_pow[12]
+	        getitem_1049 = _foreach_pow[13]
+	        getitem_1050 = _foreach_pow[14]
+	        getitem_1051 = _foreach_pow[15]
+	        getitem_1052 = _foreach_pow[16]
+	        getitem_1053 = _foreach_pow[17]
+	        getitem_1054 = _foreach_pow[18]
+	        getitem_1055 = _foreach_pow[19]
+	        getitem_1056 = _foreach_pow[20]
+	        getitem_1057 = _foreach_pow[21]
+	        getitem_1058 = _foreach_pow[22]
+	        getitem_1059 = _foreach_pow[23]
+	        getitem_1060 = _foreach_pow[24]
+	        getitem_1061 = _foreach_pow[25]
+	        getitem_1062 = _foreach_pow[26]
+	        getitem_1063 = _foreach_pow[27]
+	        getitem_1064 = _foreach_pow[28]
+	        getitem_1065 = _foreach_pow[29]
+	        getitem_1066 = _foreach_pow[30]
+	        getitem_1067 = _foreach_pow[31]
+	        getitem_1068 = _foreach_pow[32]
+	        getitem_1069 = _foreach_pow[33]
+	        getitem_1070 = _foreach_pow[34]
+	        getitem_1071 = _foreach_pow[35]
+	        getitem_1072 = _foreach_pow[36]
+	        getitem_1073 = _foreach_pow[37]
+	        getitem_1074 = _foreach_pow[38]
+	        getitem_1075 = _foreach_pow[39]
+	        getitem_1076 = _foreach_pow[40]
+	        getitem_1077 = _foreach_pow[41]
+	        getitem_1078 = _foreach_pow[42]
+	        getitem_1079 = _foreach_pow[43]
+	        getitem_1080 = _foreach_pow[44]
+	        getitem_1081 = _foreach_pow[45]
+	        getitem_1082 = _foreach_pow[46]
+	        getitem_1083 = _foreach_pow[47]
+	        getitem_1084 = _foreach_pow[48]
+	        getitem_1085 = _foreach_pow[49]
+	        getitem_1086 = _foreach_pow[50]
+	        getitem_1087 = _foreach_pow[51]
+	        getitem_1088 = _foreach_pow[52]
+	        getitem_1089 = _foreach_pow[53]
+	        getitem_1090 = _foreach_pow[54]
+	        getitem_1091 = _foreach_pow[55]
+	        getitem_1092 = _foreach_pow[56]
+	        getitem_1093 = _foreach_pow[57]
+	        getitem_1094 = _foreach_pow[58]
+	        getitem_1095 = _foreach_pow[59]
+	        getitem_1096 = _foreach_pow[60]
+	        getitem_1097 = _foreach_pow[61]
+	        getitem_1098 = _foreach_pow[62]
+	        getitem_1099 = _foreach_pow[63]
+	        getitem_1100 = _foreach_pow[64]
+	        getitem_1101 = _foreach_pow[65]
+	        getitem_1102 = _foreach_pow[66]
+	        getitem_1103 = _foreach_pow[67]
+	        getitem_1104 = _foreach_pow[68]
+	        getitem_1105 = _foreach_pow[69]
+	        getitem_1106 = _foreach_pow[70]
+	        getitem_1107 = _foreach_pow[71]
+	        getitem_1108 = _foreach_pow[72]
+	        getitem_1109 = _foreach_pow[73]
+	        getitem_1110 = _foreach_pow[74]
+	        getitem_1111 = _foreach_pow[75]
+	        getitem_1112 = _foreach_pow[76]
+	        getitem_1113 = _foreach_pow[77]
+	        getitem_1114 = _foreach_pow[78]
+	        getitem_1115 = _foreach_pow[79]
+	        getitem_1116 = _foreach_pow[80]
+	        getitem_1117 = _foreach_pow[81]
+	        getitem_1118 = _foreach_pow[82]
+	        getitem_1119 = _foreach_pow[83]
+	        getitem_1120 = _foreach_pow[84]
+	        getitem_1121 = _foreach_pow[85]
+	        getitem_1122 = _foreach_pow[86]
+	        getitem_1123 = _foreach_pow[87]
+	        getitem_1124 = _foreach_pow[88]
+	        getitem_1125 = _foreach_pow[89]
+	        getitem_1126 = _foreach_pow[90]
+	        getitem_1127 = _foreach_pow[91]
+	        getitem_1128 = _foreach_pow[92]
+	        getitem_1129 = _foreach_pow[93]
+	        getitem_1130 = _foreach_pow[94]
+	        getitem_1131 = _foreach_pow[95]
+	        getitem_1132 = _foreach_pow[96]
+	        getitem_1133 = _foreach_pow[97]
+	        getitem_1134 = _foreach_pow[98]
+	        getitem_1135 = _foreach_pow[99]
+	        getitem_1136 = _foreach_pow[100]
+	        getitem_1137 = _foreach_pow[101]
+	        getitem_1138 = _foreach_pow[102]
+	        getitem_1139 = _foreach_pow[103]
+	        getitem_1140 = _foreach_pow[104]
+	        getitem_1141 = _foreach_pow[105]
+	        getitem_1142 = _foreach_pow[106]
+	        getitem_1143 = _foreach_pow[107]
+	        getitem_1144 = _foreach_pow[108]
+	        getitem_1145 = _foreach_pow[109]
+	        getitem_1146 = _foreach_pow[110]
+	        getitem_1147 = _foreach_pow[111]
+	        getitem_1148 = _foreach_pow[112]
+	        getitem_1149 = _foreach_pow[113]
+	        getitem_1150 = _foreach_pow[114]
+	        getitem_1151 = _foreach_pow[115]
+	        getitem_1152 = _foreach_pow[116]
+	        getitem_1153 = _foreach_pow[117]
+	        getitem_1154 = _foreach_pow[118]
+	        getitem_1155 = _foreach_pow[119]
+	        getitem_1156 = _foreach_pow[120]
+	        getitem_1157 = _foreach_pow[121]
+	        getitem_1158 = _foreach_pow[122]
+	        getitem_1159 = _foreach_pow[123]
+	        getitem_1160 = _foreach_pow[124]
+	        getitem_1161 = _foreach_pow[125]
+	        getitem_1162 = _foreach_pow[126]
+	        getitem_1163 = _foreach_pow[127]
+	        getitem_1164 = _foreach_pow[128]
+	        getitem_1165 = _foreach_pow[129]
+	        getitem_1166 = _foreach_pow[130]
+	        getitem_1167 = _foreach_pow[131]
+	        getitem_1168 = _foreach_pow[132]
+	        getitem_1169 = _foreach_pow[133]
+	        getitem_1170 = _foreach_pow[134]
+	        getitem_1171 = _foreach_pow[135]
+	        getitem_1172 = _foreach_pow[136]
+	        getitem_1173 = _foreach_pow[137]
+	        getitem_1174 = _foreach_pow[138]
+	        getitem_1175 = _foreach_pow[139]
+	        getitem_1176 = _foreach_pow[140]
+	        getitem_1177 = _foreach_pow[141]
+	        getitem_1178 = _foreach_pow[142]
+	        getitem_1179 = _foreach_pow[143]
+	        getitem_1180 = _foreach_pow[144]
+	        getitem_1181 = _foreach_pow[145]
+	        getitem_1182 = _foreach_pow[146]
+	        getitem_1183 = _foreach_pow[147];  _foreach_pow = None
+	        _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])
+	        getitem_1184 = _foreach_pow_1[0]
+	        getitem_1185 = _foreach_pow_1[1]
+	        getitem_1186 = _foreach_pow_1[2]
+	        getitem_1187 = _foreach_pow_1[3]
+	        getitem_1188 = _foreach_pow_1[4]
+	        getitem_1189 = _foreach_pow_1[5]
+	        getitem_1190 = _foreach_pow_1[6]
+	        getitem_1191 = _foreach_pow_1[7]
+	        getitem_1192 = _foreach_pow_1[8]
+	        getitem_1193 = _foreach_pow_1[9]
+	        getitem_1194 = _foreach_pow_1[10]
+	        getitem_1195 = _foreach_pow_1[11]
+	        getitem_1196 = _foreach_pow_1[12]
+	        getitem_1197 = _foreach_pow_1[13]
+	        getitem_1198 = _foreach_pow_1[14]
+	        getitem_1199 = _foreach_pow_1[15]
+	        getitem_1200 = _foreach_pow_1[16]
+	        getitem_1201 = _foreach_pow_1[17]
+	        getitem_1202 = _foreach_pow_1[18]
+	        getitem_1203 = _foreach_pow_1[19]
+	        getitem_1204 = _foreach_pow_1[20]
+	        getitem_1205 = _foreach_pow_1[21]
+	        getitem_1206 = _foreach_pow_1[22]
+	        getitem_1207 = _foreach_pow_1[23]
+	        getitem_1208 = _foreach_pow_1[24]
+	        getitem_1209 = _foreach_pow_1[25]
+	        getitem_1210 = _foreach_pow_1[26]
+	        getitem_1211 = _foreach_pow_1[27]
+	        getitem_1212 = _foreach_pow_1[28]
+	        getitem_1213 = _foreach_pow_1[29]
+	        getitem_1214 = _foreach_pow_1[30]
+	        getitem_1215 = _foreach_pow_1[31]
+	        getitem_1216 = _foreach_pow_1[32]
+	        getitem_1217 = _foreach_pow_1[33]
+	        getitem_1218 = _foreach_pow_1[34]
+	        getitem_1219 = _foreach_pow_1[35]
+	        getitem_1220 = _foreach_pow_1[36]
+	        getitem_1221 = _foreach_pow_1[37]
+	        getitem_1222 = _foreach_pow_1[38]
+	        getitem_1223 = _foreach_pow_1[39]
+	        getitem_1224 = _foreach_pow_1[40]
+	        getitem_1225 = _foreach_pow_1[41]
+	        getitem_1226 = _foreach_pow_1[42]
+	        getitem_1227 = _foreach_pow_1[43]
+	        getitem_1228 = _foreach_pow_1[44]
+	        getitem_1229 = _foreach_pow_1[45]
+	        getitem_1230 = _foreach_pow_1[46]
+	        getitem_1231 = _foreach_pow_1[47]
+	        getitem_1232 = _foreach_pow_1[48]
+	        getitem_1233 = _foreach_pow_1[49]
+	        getitem_1234 = _foreach_pow_1[50]
+	        getitem_1235 = _foreach_pow_1[51]
+	        getitem_1236 = _foreach_pow_1[52]
+	        getitem_1237 = _foreach_pow_1[53]
+	        getitem_1238 = _foreach_pow_1[54]
+	        getitem_1239 = _foreach_pow_1[55]
+	        getitem_1240 = _foreach_pow_1[56]
+	        getitem_1241 = _foreach_pow_1[57]
+	        getitem_1242 = _foreach_pow_1[58]
+	        getitem_1243 = _foreach_pow_1[59]
+	        getitem_1244 = _foreach_pow_1[60]
+	        getitem_1245 = _foreach_pow_1[61]
+	        getitem_1246 = _foreach_pow_1[62]
+	        getitem_1247 = _foreach_pow_1[63]
+	        getitem_1248 = _foreach_pow_1[64]
+	        getitem_1249 = _foreach_pow_1[65]
+	        getitem_1250 = _foreach_pow_1[66]
+	        getitem_1251 = _foreach_pow_1[67]
+	        getitem_1252 = _foreach_pow_1[68]
+	        getitem_1253 = _foreach_pow_1[69]
+	        getitem_1254 = _foreach_pow_1[70]
+	        getitem_1255 = _foreach_pow_1[71]
+	        getitem_1256 = _foreach_pow_1[72]
+	        getitem_1257 = _foreach_pow_1[73]
+	        getitem_1258 = _foreach_pow_1[74]
+	        getitem_1259 = _foreach_pow_1[75]
+	        getitem_1260 = _foreach_pow_1[76]
+	        getitem_1261 = _foreach_pow_1[77]
+	        getitem_1262 = _foreach_pow_1[78]
+	        getitem_1263 = _foreach_pow_1[79]
+	        getitem_1264 = _foreach_pow_1[80]
+	        getitem_1265 = _foreach_pow_1[81]
+	        getitem_1266 = _foreach_pow_1[82]
+	        getitem_1267 = _foreach_pow_1[83]
+	        getitem_1268 = _foreach_pow_1[84]
+	        getitem_1269 = _foreach_pow_1[85]
+	        getitem_1270 = _foreach_pow_1[86]
+	        getitem_1271 = _foreach_pow_1[87]
+	        getitem_1272 = _foreach_pow_1[88]
+	        getitem_1273 = _foreach_pow_1[89]
+	        getitem_1274 = _foreach_pow_1[90]
+	        getitem_1275 = _foreach_pow_1[91]
+	        getitem_1276 = _foreach_pow_1[92]
+	        getitem_1277 = _foreach_pow_1[93]
+	        getitem_1278 = _foreach_pow_1[94]
+	        getitem_1279 = _foreach_pow_1[95]
+	        getitem_1280 = _foreach_pow_1[96]
+	        getitem_1281 = _foreach_pow_1[97]
+	        getitem_1282 = _foreach_pow_1[98]
+	        getitem_1283 = _foreach_pow_1[99]
+	        getitem_1284 = _foreach_pow_1[100]
+	        getitem_1285 = _foreach_pow_1[101]
+	        getitem_1286 = _foreach_pow_1[102]
+	        getitem_1287 = _foreach_pow_1[103]
+	        getitem_1288 = _foreach_pow_1[104]
+	        getitem_1289 = _foreach_pow_1[105]
+	        getitem_1290 = _foreach_pow_1[106]
+	        getitem_1291 = _foreach_pow_1[107]
+	        getitem_1292 = _foreach_pow_1[108]
+	        getitem_1293 = _foreach_pow_1[109]
+	        getitem_1294 = _foreach_pow_1[110]
+	        getitem_1295 = _foreach_pow_1[111]
+	        getitem_1296 = _foreach_pow_1[112]
+	        getitem_1297 = _foreach_pow_1[113]
+	        getitem_1298 = _foreach_pow_1[114]
+	        getitem_1299 = _foreach_pow_1[115]
+	        getitem_1300 = _foreach_pow_1[116]
+	        getitem_1301 = _foreach_pow_1[117]
+	        getitem_1302 = _foreach_pow_1[118]
+	        getitem_1303 = _foreach_pow_1[119]
+	        getitem_1304 = _foreach_pow_1[120]
+	        getitem_1305 = _foreach_pow_1[121]
+	        getitem_1306 = _foreach_pow_1[122]
+	        getitem_1307 = _foreach_pow_1[123]
+	        getitem_1308 = _foreach_pow_1[124]
+	        getitem_1309 = _foreach_pow_1[125]
+	        getitem_1310 = _foreach_pow_1[126]
+	        getitem_1311 = _foreach_pow_1[127]
+	        getitem_1312 = _foreach_pow_1[128]
+	        getitem_1313 = _foreach_pow_1[129]
+	        getitem_1314 = _foreach_pow_1[130]
+	        getitem_1315 = _foreach_pow_1[131]
+	        getitem_1316 = _foreach_pow_1[132]
+	        getitem_1317 = _foreach_pow_1[133]
+	        getitem_1318 = _foreach_pow_1[134]
+	        getitem_1319 = _foreach_pow_1[135]
+	        getitem_1320 = _foreach_pow_1[136]
+	        getitem_1321 = _foreach_pow_1[137]
+	        getitem_1322 = _foreach_pow_1[138]
+	        getitem_1323 = _foreach_pow_1[139]
+	        getitem_1324 = _foreach_pow_1[140]
+	        getitem_1325 = _foreach_pow_1[141]
+	        getitem_1326 = _foreach_pow_1[142]
+	        getitem_1327 = _foreach_pow_1[143]
+	        getitem_1328 = _foreach_pow_1[144]
+	        getitem_1329 = _foreach_pow_1[145]
+	        getitem_1330 = _foreach_pow_1[146]
+	        getitem_1331 = _foreach_pow_1[147];  _foreach_pow_1 = None
+	        _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1);  getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None
+	        getitem_1332 = _foreach_sub_1[0]
+	        getitem_1333 = _foreach_sub_1[1]
+	        getitem_1334 = _foreach_sub_1[2]
+	        getitem_1335 = _foreach_sub_1[3]
+	        getitem_1336 = _foreach_sub_1[4]
+	        getitem_1337 = _foreach_sub_1[5]
+	        getitem_1338 = _foreach_sub_1[6]
+	        getitem_1339 = _foreach_sub_1[7]
+	        getitem_1340 = _foreach_sub_1[8]
+	        getitem_1341 = _foreach_sub_1[9]
+	        getitem_1342 = _foreach_sub_1[10]
+	        getitem_1343 = _foreach_sub_1[11]
+	        getitem_1344 = _foreach_sub_1[12]
+	        getitem_1345 = _foreach_sub_1[13]
+	        getitem_1346 = _foreach_sub_1[14]
+	        getitem_1347 = _foreach_sub_1[15]
+	        getitem_1348 = _foreach_sub_1[16]
+	        getitem_1349 = _foreach_sub_1[17]
+	        getitem_1350 = _foreach_sub_1[18]
+	        getitem_1351 = _foreach_sub_1[19]
+	        getitem_1352 = _foreach_sub_1[20]
+	        getitem_1353 = _foreach_sub_1[21]
+	        getitem_1354 = _foreach_sub_1[22]
+	        getitem_1355 = _foreach_sub_1[23]
+	        getitem_1356 = _foreach_sub_1[24]
+	        getitem_1357 = _foreach_sub_1[25]
+	        getitem_1358 = _foreach_sub_1[26]
+	        getitem_1359 = _foreach_sub_1[27]
+	        getitem_1360 = _foreach_sub_1[28]
+	        getitem_1361 = _foreach_sub_1[29]
+	        getitem_1362 = _foreach_sub_1[30]
+	        getitem_1363 = _foreach_sub_1[31]
+	        getitem_1364 = _foreach_sub_1[32]
+	        getitem_1365 = _foreach_sub_1[33]
+	        getitem_1366 = _foreach_sub_1[34]
+	        getitem_1367 = _foreach_sub_1[35]
+	        getitem_1368 = _foreach_sub_1[36]
+	        getitem_1369 = _foreach_sub_1[37]
+	        getitem_1370 = _foreach_sub_1[38]
+	        getitem_1371 = _foreach_sub_1[39]
+	        getitem_1372 = _foreach_sub_1[40]
+	        getitem_1373 = _foreach_sub_1[41]
+	        getitem_1374 = _foreach_sub_1[42]
+	        getitem_1375 = _foreach_sub_1[43]
+	        getitem_1376 = _foreach_sub_1[44]
+	        getitem_1377 = _foreach_sub_1[45]
+	        getitem_1378 = _foreach_sub_1[46]
+	        getitem_1379 = _foreach_sub_1[47]
+	        getitem_1380 = _foreach_sub_1[48]
+	        getitem_1381 = _foreach_sub_1[49]
+	        getitem_1382 = _foreach_sub_1[50]
+	        getitem_1383 = _foreach_sub_1[51]
+	        getitem_1384 = _foreach_sub_1[52]
+	        getitem_1385 = _foreach_sub_1[53]
+	        getitem_1386 = _foreach_sub_1[54]
+	        getitem_1387 = _foreach_sub_1[55]
+	        getitem_1388 = _foreach_sub_1[56]
+	        getitem_1389 = _foreach_sub_1[57]
+	        getitem_1390 = _foreach_sub_1[58]
+	        getitem_1391 = _foreach_sub_1[59]
+	        getitem_1392 = _foreach_sub_1[60]
+	        getitem_1393 = _foreach_sub_1[61]
+	        getitem_1394 = _foreach_sub_1[62]
+	        getitem_1395 = _foreach_sub_1[63]
+	        getitem_1396 = _foreach_sub_1[64]
+	        getitem_1397 = _foreach_sub_1[65]
+	        getitem_1398 = _foreach_sub_1[66]
+	        getitem_1399 = _foreach_sub_1[67]
+	        getitem_1400 = _foreach_sub_1[68]
+	        getitem_1401 = _foreach_sub_1[69]
+	        getitem_1402 = _foreach_sub_1[70]
+	        getitem_1403 = _foreach_sub_1[71]
+	        getitem_1404 = _foreach_sub_1[72]
+	        getitem_1405 = _foreach_sub_1[73]
+	        getitem_1406 = _foreach_sub_1[74]
+	        getitem_1407 = _foreach_sub_1[75]
+	        getitem_1408 = _foreach_sub_1[76]
+	        getitem_1409 = _foreach_sub_1[77]
+	        getitem_1410 = _foreach_sub_1[78]
+	        getitem_1411 = _foreach_sub_1[79]
+	        getitem_1412 = _foreach_sub_1[80]
+	        getitem_1413 = _foreach_sub_1[81]
+	        getitem_1414 = _foreach_sub_1[82]
+	        getitem_1415 = _foreach_sub_1[83]
+	        getitem_1416 = _foreach_sub_1[84]
+	        getitem_1417 = _foreach_sub_1[85]
+	        getitem_1418 = _foreach_sub_1[86]
+	        getitem_1419 = _foreach_sub_1[87]
+	        getitem_1420 = _foreach_sub_1[88]
+	        getitem_1421 = _foreach_sub_1[89]
+	        getitem_1422 = _foreach_sub_1[90]
+	        getitem_1423 = _foreach_sub_1[91]
+	        getitem_1424 = _foreach_sub_1[92]
+	        getitem_1425 = _foreach_sub_1[93]
+	        getitem_1426 = _foreach_sub_1[94]
+	        getitem_1427 = _foreach_sub_1[95]
+	        getitem_1428 = _foreach_sub_1[96]
+	        getitem_1429 = _foreach_sub_1[97]
+	        getitem_1430 = _foreach_sub_1[98]
+	        getitem_1431 = _foreach_sub_1[99]
+	        getitem_1432 = _foreach_sub_1[100]
+	        getitem_1433 = _foreach_sub_1[101]
+	        getitem_1434 = _foreach_sub_1[102]
+	        getitem_1435 = _foreach_sub_1[103]
+	        getitem_1436 = _foreach_sub_1[104]
+	        getitem_1437 = _foreach_sub_1[105]
+	        getitem_1438 = _foreach_sub_1[106]
+	        getitem_1439 = _foreach_sub_1[107]
+	        getitem_1440 = _foreach_sub_1[108]
+	        getitem_1441 = _foreach_sub_1[109]
+	        getitem_1442 = _foreach_sub_1[110]
+	        getitem_1443 = _foreach_sub_1[111]
+	        getitem_1444 = _foreach_sub_1[112]
+	        getitem_1445 = _foreach_sub_1[113]
+	        getitem_1446 = _foreach_sub_1[114]
+	        getitem_1447 = _foreach_sub_1[115]
+	        getitem_1448 = _foreach_sub_1[116]
+	        getitem_1449 = _foreach_sub_1[117]
+	        getitem_1450 = _foreach_sub_1[118]
+	        getitem_1451 = _foreach_sub_1[119]
+	        getitem_1452 = _foreach_sub_1[120]
+	        getitem_1453 = _foreach_sub_1[121]
+	        getitem_1454 = _foreach_sub_1[122]
+	        getitem_1455 = _foreach_sub_1[123]
+	        getitem_1456 = _foreach_sub_1[124]
+	        getitem_1457 = _foreach_sub_1[125]
+	        getitem_1458 = _foreach_sub_1[126]
+	        getitem_1459 = _foreach_sub_1[127]
+	        getitem_1460 = _foreach_sub_1[128]
+	        getitem_1461 = _foreach_sub_1[129]
+	        getitem_1462 = _foreach_sub_1[130]
+	        getitem_1463 = _foreach_sub_1[131]
+	        getitem_1464 = _foreach_sub_1[132]
+	        getitem_1465 = _foreach_sub_1[133]
+	        getitem_1466 = _foreach_sub_1[134]
+	        getitem_1467 = _foreach_sub_1[135]
+	        getitem_1468 = _foreach_sub_1[136]
+	        getitem_1469 = _foreach_sub_1[137]
+	        getitem_1470 = _foreach_sub_1[138]
+	        getitem_1471 = _foreach_sub_1[139]
+	        getitem_1472 = _foreach_sub_1[140]
+	        getitem_1473 = _foreach_sub_1[141]
+	        getitem_1474 = _foreach_sub_1[142]
+	        getitem_1475 = _foreach_sub_1[143]
+	        getitem_1476 = _foreach_sub_1[144]
+	        getitem_1477 = _foreach_sub_1[145]
+	        getitem_1478 = _foreach_sub_1[146]
+	        getitem_1479 = _foreach_sub_1[147];  _foreach_sub_1 = None
+	        _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1);  getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None
+	        getitem_1480 = _foreach_sub_2[0]
+	        getitem_1481 = _foreach_sub_2[1]
+	        getitem_1482 = _foreach_sub_2[2]
+	        getitem_1483 = _foreach_sub_2[3]
+	        getitem_1484 = _foreach_sub_2[4]
+	        getitem_1485 = _foreach_sub_2[5]
+	        getitem_1486 = _foreach_sub_2[6]
+	        getitem_1487 = _foreach_sub_2[7]
+	        getitem_1488 = _foreach_sub_2[8]
+	        getitem_1489 = _foreach_sub_2[9]
+	        getitem_1490 = _foreach_sub_2[10]
+	        getitem_1491 = _foreach_sub_2[11]
+	        getitem_1492 = _foreach_sub_2[12]
+	        getitem_1493 = _foreach_sub_2[13]
+	        getitem_1494 = _foreach_sub_2[14]
+	        getitem_1495 = _foreach_sub_2[15]
+	        getitem_1496 = _foreach_sub_2[16]
+	        getitem_1497 = _foreach_sub_2[17]
+	        getitem_1498 = _foreach_sub_2[18]
+	        getitem_1499 = _foreach_sub_2[19]
+	        getitem_1500 = _foreach_sub_2[20]
+	        getitem_1501 = _foreach_sub_2[21]
+	        getitem_1502 = _foreach_sub_2[22]
+	        getitem_1503 = _foreach_sub_2[23]
+	        getitem_1504 = _foreach_sub_2[24]
+	        getitem_1505 = _foreach_sub_2[25]
+	        getitem_1506 = _foreach_sub_2[26]
+	        getitem_1507 = _foreach_sub_2[27]
+	        getitem_1508 = _foreach_sub_2[28]
+	        getitem_1509 = _foreach_sub_2[29]
+	        getitem_1510 = _foreach_sub_2[30]
+	        getitem_1511 = _foreach_sub_2[31]
+	        getitem_1512 = _foreach_sub_2[32]
+	        getitem_1513 = _foreach_sub_2[33]
+	        getitem_1514 = _foreach_sub_2[34]
+	        getitem_1515 = _foreach_sub_2[35]
+	        getitem_1516 = _foreach_sub_2[36]
+	        getitem_1517 = _foreach_sub_2[37]
+	        getitem_1518 = _foreach_sub_2[38]
+	        getitem_1519 = _foreach_sub_2[39]
+	        getitem_1520 = _foreach_sub_2[40]
+	        getitem_1521 = _foreach_sub_2[41]
+	        getitem_1522 = _foreach_sub_2[42]
+	        getitem_1523 = _foreach_sub_2[43]
+	        getitem_1524 = _foreach_sub_2[44]
+	        getitem_1525 = _foreach_sub_2[45]
+	        getitem_1526 = _foreach_sub_2[46]
+	        getitem_1527 = _foreach_sub_2[47]
+	        getitem_1528 = _foreach_sub_2[48]
+	        getitem_1529 = _foreach_sub_2[49]
+	        getitem_1530 = _foreach_sub_2[50]
+	        getitem_1531 = _foreach_sub_2[51]
+	        getitem_1532 = _foreach_sub_2[52]
+	        getitem_1533 = _foreach_sub_2[53]
+	        getitem_1534 = _foreach_sub_2[54]
+	        getitem_1535 = _foreach_sub_2[55]
+	        getitem_1536 = _foreach_sub_2[56]
+	        getitem_1537 = _foreach_sub_2[57]
+	        getitem_1538 = _foreach_sub_2[58]
+	        getitem_1539 = _foreach_sub_2[59]
+	        getitem_1540 = _foreach_sub_2[60]
+	        getitem_1541 = _foreach_sub_2[61]
+	        getitem_1542 = _foreach_sub_2[62]
+	        getitem_1543 = _foreach_sub_2[63]
+	        getitem_1544 = _foreach_sub_2[64]
+	        getitem_1545 = _foreach_sub_2[65]
+	        getitem_1546 = _foreach_sub_2[66]
+	        getitem_1547 = _foreach_sub_2[67]
+	        getitem_1548 = _foreach_sub_2[68]
+	        getitem_1549 = _foreach_sub_2[69]
+	        getitem_1550 = _foreach_sub_2[70]
+	        getitem_1551 = _foreach_sub_2[71]
+	        getitem_1552 = _foreach_sub_2[72]
+	        getitem_1553 = _foreach_sub_2[73]
+	        getitem_1554 = _foreach_sub_2[74]
+	        getitem_1555 = _foreach_sub_2[75]
+	        getitem_1556 = _foreach_sub_2[76]
+	        getitem_1557 = _foreach_sub_2[77]
+	        getitem_1558 = _foreach_sub_2[78]
+	        getitem_1559 = _foreach_sub_2[79]
+	        getitem_1560 = _foreach_sub_2[80]
+	        getitem_1561 = _foreach_sub_2[81]
+	        getitem_1562 = _foreach_sub_2[82]
+	        getitem_1563 = _foreach_sub_2[83]
+	        getitem_1564 = _foreach_sub_2[84]
+	        getitem_1565 = _foreach_sub_2[85]
+	        getitem_1566 = _foreach_sub_2[86]
+	        getitem_1567 = _foreach_sub_2[87]
+	        getitem_1568 = _foreach_sub_2[88]
+	        getitem_1569 = _foreach_sub_2[89]
+	        getitem_1570 = _foreach_sub_2[90]
+	        getitem_1571 = _foreach_sub_2[91]
+	        getitem_1572 = _foreach_sub_2[92]
+	        getitem_1573 = _foreach_sub_2[93]
+	        getitem_1574 = _foreach_sub_2[94]
+	        getitem_1575 = _foreach_sub_2[95]
+	        getitem_1576 = _foreach_sub_2[96]
+	        getitem_1577 = _foreach_sub_2[97]
+	        getitem_1578 = _foreach_sub_2[98]
+	        getitem_1579 = _foreach_sub_2[99]
+	        getitem_1580 = _foreach_sub_2[100]
+	        getitem_1581 = _foreach_sub_2[101]
+	        getitem_1582 = _foreach_sub_2[102]
+	        getitem_1583 = _foreach_sub_2[103]
+	        getitem_1584 = _foreach_sub_2[104]
+	        getitem_1585 = _foreach_sub_2[105]
+	        getitem_1586 = _foreach_sub_2[106]
+	        getitem_1587 = _foreach_sub_2[107]
+	        getitem_1588 = _foreach_sub_2[108]
+	        getitem_1589 = _foreach_sub_2[109]
+	        getitem_1590 = _foreach_sub_2[110]
+	        getitem_1591 = _foreach_sub_2[111]
+	        getitem_1592 = _foreach_sub_2[112]
+	        getitem_1593 = _foreach_sub_2[113]
+	        getitem_1594 = _foreach_sub_2[114]
+	        getitem_1595 = _foreach_sub_2[115]
+	        getitem_1596 = _foreach_sub_2[116]
+	        getitem_1597 = _foreach_sub_2[117]
+	        getitem_1598 = _foreach_sub_2[118]
+	        getitem_1599 = _foreach_sub_2[119]
+	        getitem_1600 = _foreach_sub_2[120]
+	        getitem_1601 = _foreach_sub_2[121]
+	        getitem_1602 = _foreach_sub_2[122]
+	        getitem_1603 = _foreach_sub_2[123]
+	        getitem_1604 = _foreach_sub_2[124]
+	        getitem_1605 = _foreach_sub_2[125]
+	        getitem_1606 = _foreach_sub_2[126]
+	        getitem_1607 = _foreach_sub_2[127]
+	        getitem_1608 = _foreach_sub_2[128]
+	        getitem_1609 = _foreach_sub_2[129]
+	        getitem_1610 = _foreach_sub_2[130]
+	        getitem_1611 = _foreach_sub_2[131]
+	        getitem_1612 = _foreach_sub_2[132]
+	        getitem_1613 = _foreach_sub_2[133]
+	        getitem_1614 = _foreach_sub_2[134]
+	        getitem_1615 = _foreach_sub_2[135]
+	        getitem_1616 = _foreach_sub_2[136]
+	        getitem_1617 = _foreach_sub_2[137]
+	        getitem_1618 = _foreach_sub_2[138]
+	        getitem_1619 = _foreach_sub_2[139]
+	        getitem_1620 = _foreach_sub_2[140]
+	        getitem_1621 = _foreach_sub_2[141]
+	        getitem_1622 = _foreach_sub_2[142]
+	        getitem_1623 = _foreach_sub_2[143]
+	        getitem_1624 = _foreach_sub_2[144]
+	        getitem_1625 = _foreach_sub_2[145]
+	        getitem_1626 = _foreach_sub_2[146]
+	        getitem_1627 = _foreach_sub_2[147];  _foreach_sub_2 = None
+	        _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]);  getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None
+	        getitem_1628 = _foreach_neg[0]
+	        getitem_1629 = _foreach_neg[1]
+	        getitem_1630 = _foreach_neg[2]
+	        getitem_1631 = _foreach_neg[3]
+	        getitem_1632 = _foreach_neg[4]
+	        getitem_1633 = _foreach_neg[5]
+	        getitem_1634 = _foreach_neg[6]
+	        getitem_1635 = _foreach_neg[7]
+	        getitem_1636 = _foreach_neg[8]
+	        getitem_1637 = _foreach_neg[9]
+	        getitem_1638 = _foreach_neg[10]
+	        getitem_1639 = _foreach_neg[11]
+	        getitem_1640 = _foreach_neg[12]
+	        getitem_1641 = _foreach_neg[13]
+	        getitem_1642 = _foreach_neg[14]
+	        getitem_1643 = _foreach_neg[15]
+	        getitem_1644 = _foreach_neg[16]
+	        getitem_1645 = _foreach_neg[17]
+	        getitem_1646 = _foreach_neg[18]
+	        getitem_1647 = _foreach_neg[19]
+	        getitem_1648 = _foreach_neg[20]
+	        getitem_1649 = _foreach_neg[21]
+	        getitem_1650 = _foreach_neg[22]
+	        getitem_1651 = _foreach_neg[23]
+	        getitem_1652 = _foreach_neg[24]
+	        getitem_1653 = _foreach_neg[25]
+	        getitem_1654 = _foreach_neg[26]
+	        getitem_1655 = _foreach_neg[27]
+	        getitem_1656 = _foreach_neg[28]
+	        getitem_1657 = _foreach_neg[29]
+	        getitem_1658 = _foreach_neg[30]
+	        getitem_1659 = _foreach_neg[31]
+	        getitem_1660 = _foreach_neg[32]
+	        getitem_1661 = _foreach_neg[33]
+	        getitem_1662 = _foreach_neg[34]
+	        getitem_1663 = _foreach_neg[35]
+	        getitem_1664 = _foreach_neg[36]
+	        getitem_1665 = _foreach_neg[37]
+	        getitem_1666 = _foreach_neg[38]
+	        getitem_1667 = _foreach_neg[39]
+	        getitem_1668 = _foreach_neg[40]
+	        getitem_1669 = _foreach_neg[41]
+	        getitem_1670 = _foreach_neg[42]
+	        getitem_1671 = _foreach_neg[43]
+	        getitem_1672 = _foreach_neg[44]
+	        getitem_1673 = _foreach_neg[45]
+	        getitem_1674 = _foreach_neg[46]
+	        getitem_1675 = _foreach_neg[47]
+	        getitem_1676 = _foreach_neg[48]
+	        getitem_1677 = _foreach_neg[49]
+	        getitem_1678 = _foreach_neg[50]
+	        getitem_1679 = _foreach_neg[51]
+	        getitem_1680 = _foreach_neg[52]
+	        getitem_1681 = _foreach_neg[53]
+	        getitem_1682 = _foreach_neg[54]
+	        getitem_1683 = _foreach_neg[55]
+	        getitem_1684 = _foreach_neg[56]
+	        getitem_1685 = _foreach_neg[57]
+	        getitem_1686 = _foreach_neg[58]
+	        getitem_1687 = _foreach_neg[59]
+	        getitem_1688 = _foreach_neg[60]
+	        getitem_1689 = _foreach_neg[61]
+	        getitem_1690 = _foreach_neg[62]
+	        getitem_1691 = _foreach_neg[63]
+	        getitem_1692 = _foreach_neg[64]
+	        getitem_1693 = _foreach_neg[65]
+	        getitem_1694 = _foreach_neg[66]
+	        getitem_1695 = _foreach_neg[67]
+	        getitem_1696 = _foreach_neg[68]
+	        getitem_1697 = _foreach_neg[69]
+	        getitem_1698 = _foreach_neg[70]
+	        getitem_1699 = _foreach_neg[71]
+	        getitem_1700 = _foreach_neg[72]
+	        getitem_1701 = _foreach_neg[73]
+	        getitem_1702 = _foreach_neg[74]
+	        getitem_1703 = _foreach_neg[75]
+	        getitem_1704 = _foreach_neg[76]
+	        getitem_1705 = _foreach_neg[77]
+	        getitem_1706 = _foreach_neg[78]
+	        getitem_1707 = _foreach_neg[79]
+	        getitem_1708 = _foreach_neg[80]
+	        getitem_1709 = _foreach_neg[81]
+	        getitem_1710 = _foreach_neg[82]
+	        getitem_1711 = _foreach_neg[83]
+	        getitem_1712 = _foreach_neg[84]
+	        getitem_1713 = _foreach_neg[85]
+	        getitem_1714 = _foreach_neg[86]
+	        getitem_1715 = _foreach_neg[87]
+	        getitem_1716 = _foreach_neg[88]
+	        getitem_1717 = _foreach_neg[89]
+	        getitem_1718 = _foreach_neg[90]
+	        getitem_1719 = _foreach_neg[91]
+	        getitem_1720 = _foreach_neg[92]
+	        getitem_1721 = _foreach_neg[93]
+	        getitem_1722 = _foreach_neg[94]
+	        getitem_1723 = _foreach_neg[95]
+	        getitem_1724 = _foreach_neg[96]
+	        getitem_1725 = _foreach_neg[97]
+	        getitem_1726 = _foreach_neg[98]
+	        getitem_1727 = _foreach_neg[99]
+	        getitem_1728 = _foreach_neg[100]
+	        getitem_1729 = _foreach_neg[101]
+	        getitem_1730 = _foreach_neg[102]
+	        getitem_1731 = _foreach_neg[103]
+	        getitem_1732 = _foreach_neg[104]
+	        getitem_1733 = _foreach_neg[105]
+	        getitem_1734 = _foreach_neg[106]
+	        getitem_1735 = _foreach_neg[107]
+	        getitem_1736 = _foreach_neg[108]
+	        getitem_1737 = _foreach_neg[109]
+	        getitem_1738 = _foreach_neg[110]
+	        getitem_1739 = _foreach_neg[111]
+	        getitem_1740 = _foreach_neg[112]
+	        getitem_1741 = _foreach_neg[113]
+	        getitem_1742 = _foreach_neg[114]
+	        getitem_1743 = _foreach_neg[115]
+	        getitem_1744 = _foreach_neg[116]
+	        getitem_1745 = _foreach_neg[117]
+	        getitem_1746 = _foreach_neg[118]
+	        getitem_1747 = _foreach_neg[119]
+	        getitem_1748 = _foreach_neg[120]
+	        getitem_1749 = _foreach_neg[121]
+	        getitem_1750 = _foreach_neg[122]
+	        getitem_1751 = _foreach_neg[123]
+	        getitem_1752 = _foreach_neg[124]
+	        getitem_1753 = _foreach_neg[125]
+	        getitem_1754 = _foreach_neg[126]
+	        getitem_1755 = _foreach_neg[127]
+	        getitem_1756 = _foreach_neg[128]
+	        getitem_1757 = _foreach_neg[129]
+	        getitem_1758 = _foreach_neg[130]
+	        getitem_1759 = _foreach_neg[131]
+	        getitem_1760 = _foreach_neg[132]
+	        getitem_1761 = _foreach_neg[133]
+	        getitem_1762 = _foreach_neg[134]
+	        getitem_1763 = _foreach_neg[135]
+	        getitem_1764 = _foreach_neg[136]
+	        getitem_1765 = _foreach_neg[137]
+	        getitem_1766 = _foreach_neg[138]
+	        getitem_1767 = _foreach_neg[139]
+	        getitem_1768 = _foreach_neg[140]
+	        getitem_1769 = _foreach_neg[141]
+	        getitem_1770 = _foreach_neg[142]
+	        getitem_1771 = _foreach_neg[143]
+	        getitem_1772 = _foreach_neg[144]
+	        getitem_1773 = _foreach_neg[145]
+	        getitem_1774 = _foreach_neg[146]
+	        getitem_1775 = _foreach_neg[147];  _foreach_neg = None
+	        _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01);  getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None
+	        getitem_1776 = _foreach_div[0]
+	        getitem_1777 = _foreach_div[1]
+	        getitem_1778 = _foreach_div[2]
+	        getitem_1779 = _foreach_div[3]
+	        getitem_1780 = _foreach_div[4]
+	        getitem_1781 = _foreach_div[5]
+	        getitem_1782 = _foreach_div[6]
+	        getitem_1783 = _foreach_div[7]
+	        getitem_1784 = _foreach_div[8]
+	        getitem_1785 = _foreach_div[9]
+	        getitem_1786 = _foreach_div[10]
+	        getitem_1787 = _foreach_div[11]
+	        getitem_1788 = _foreach_div[12]
+	        getitem_1789 = _foreach_div[13]
+	        getitem_1790 = _foreach_div[14]
+	        getitem_1791 = _foreach_div[15]
+	        getitem_1792 = _foreach_div[16]
+	        getitem_1793 = _foreach_div[17]
+	        getitem_1794 = _foreach_div[18]
+	        getitem_1795 = _foreach_div[19]
+	        getitem_1796 = _foreach_div[20]
+	        getitem_1797 = _foreach_div[21]
+	        getitem_1798 = _foreach_div[22]
+	        getitem_1799 = _foreach_div[23]
+	        getitem_1800 = _foreach_div[24]
+	        getitem_1801 = _foreach_div[25]
+	        getitem_1802 = _foreach_div[26]
+	        getitem_1803 = _foreach_div[27]
+	        getitem_1804 = _foreach_div[28]
+	        getitem_1805 = _foreach_div[29]
+	        getitem_1806 = _foreach_div[30]
+	        getitem_1807 = _foreach_div[31]
+	        getitem_1808 = _foreach_div[32]
+	        getitem_1809 = _foreach_div[33]
+	        getitem_1810 = _foreach_div[34]
+	        getitem_1811 = _foreach_div[35]
+	        getitem_1812 = _foreach_div[36]
+	        getitem_1813 = _foreach_div[37]
+	        getitem_1814 = _foreach_div[38]
+	        getitem_1815 = _foreach_div[39]
+	        getitem_1816 = _foreach_div[40]
+	        getitem_1817 = _foreach_div[41]
+	        getitem_1818 = _foreach_div[42]
+	        getitem_1819 = _foreach_div[43]
+	        getitem_1820 = _foreach_div[44]
+	        getitem_1821 = _foreach_div[45]
+	        getitem_1822 = _foreach_div[46]
+	        getitem_1823 = _foreach_div[47]
+	        getitem_1824 = _foreach_div[48]
+	        getitem_1825 = _foreach_div[49]
+	        getitem_1826 = _foreach_div[50]
+	        getitem_1827 = _foreach_div[51]
+	        getitem_1828 = _foreach_div[52]
+	        getitem_1829 = _foreach_div[53]
+	        getitem_1830 = _foreach_div[54]
+	        getitem_1831 = _foreach_div[55]
+	        getitem_1832 = _foreach_div[56]
+	        getitem_1833 = _foreach_div[57]
+	        getitem_1834 = _foreach_div[58]
+	        getitem_1835 = _foreach_div[59]
+	        getitem_1836 = _foreach_div[60]
+	        getitem_1837 = _foreach_div[61]
+	        getitem_1838 = _foreach_div[62]
+	        getitem_1839 = _foreach_div[63]
+	        getitem_1840 = _foreach_div[64]
+	        getitem_1841 = _foreach_div[65]
+	        getitem_1842 = _foreach_div[66]
+	        getitem_1843 = _foreach_div[67]
+	        getitem_1844 = _foreach_div[68]
+	        getitem_1845 = _foreach_div[69]
+	        getitem_1846 = _foreach_div[70]
+	        getitem_1847 = _foreach_div[71]
+	        getitem_1848 = _foreach_div[72]
+	        getitem_1849 = _foreach_div[73]
+	        getitem_1850 = _foreach_div[74]
+	        getitem_1851 = _foreach_div[75]
+	        getitem_1852 = _foreach_div[76]
+	        getitem_1853 = _foreach_div[77]
+	        getitem_1854 = _foreach_div[78]
+	        getitem_1855 = _foreach_div[79]
+	        getitem_1856 = _foreach_div[80]
+	        getitem_1857 = _foreach_div[81]
+	        getitem_1858 = _foreach_div[82]
+	        getitem_1859 = _foreach_div[83]
+	        getitem_1860 = _foreach_div[84]
+	        getitem_1861 = _foreach_div[85]
+	        getitem_1862 = _foreach_div[86]
+	        getitem_1863 = _foreach_div[87]
+	        getitem_1864 = _foreach_div[88]
+	        getitem_1865 = _foreach_div[89]
+	        getitem_1866 = _foreach_div[90]
+	        getitem_1867 = _foreach_div[91]
+	        getitem_1868 = _foreach_div[92]
+	        getitem_1869 = _foreach_div[93]
+	        getitem_1870 = _foreach_div[94]
+	        getitem_1871 = _foreach_div[95]
+	        getitem_1872 = _foreach_div[96]
+	        getitem_1873 = _foreach_div[97]
+	        getitem_1874 = _foreach_div[98]
+	        getitem_1875 = _foreach_div[99]
+	        getitem_1876 = _foreach_div[100]
+	        getitem_1877 = _foreach_div[101]
+	        getitem_1878 = _foreach_div[102]
+	        getitem_1879 = _foreach_div[103]
+	        getitem_1880 = _foreach_div[104]
+	        getitem_1881 = _foreach_div[105]
+	        getitem_1882 = _foreach_div[106]
+	        getitem_1883 = _foreach_div[107]
+	        getitem_1884 = _foreach_div[108]
+	        getitem_1885 = _foreach_div[109]
+	        getitem_1886 = _foreach_div[110]
+	        getitem_1887 = _foreach_div[111]
+	        getitem_1888 = _foreach_div[112]
+	        getitem_1889 = _foreach_div[113]
+	        getitem_1890 = _foreach_div[114]
+	        getitem_1891 = _foreach_div[115]
+	        getitem_1892 = _foreach_div[116]
+	        getitem_1893 = _foreach_div[117]
+	        getitem_1894 = _foreach_div[118]
+	        getitem_1895 = _foreach_div[119]
+	        getitem_1896 = _foreach_div[120]
+	        getitem_1897 = _foreach_div[121]
+	        getitem_1898 = _foreach_div[122]
+	        getitem_1899 = _foreach_div[123]
+	        getitem_1900 = _foreach_div[124]
+	        getitem_1901 = _foreach_div[125]
+	        getitem_1902 = _foreach_div[126]
+	        getitem_1903 = _foreach_div[127]
+	        getitem_1904 = _foreach_div[128]
+	        getitem_1905 = _foreach_div[129]
+	        getitem_1906 = _foreach_div[130]
+	        getitem_1907 = _foreach_div[131]
+	        getitem_1908 = _foreach_div[132]
+	        getitem_1909 = _foreach_div[133]
+	        getitem_1910 = _foreach_div[134]
+	        getitem_1911 = _foreach_div[135]
+	        getitem_1912 = _foreach_div[136]
+	        getitem_1913 = _foreach_div[137]
+	        getitem_1914 = _foreach_div[138]
+	        getitem_1915 = _foreach_div[139]
+	        getitem_1916 = _foreach_div[140]
+	        getitem_1917 = _foreach_div[141]
+	        getitem_1918 = _foreach_div[142]
+	        getitem_1919 = _foreach_div[143]
+	        getitem_1920 = _foreach_div[144]
+	        getitem_1921 = _foreach_div[145]
+	        getitem_1922 = _foreach_div[146]
+	        getitem_1923 = _foreach_div[147];  _foreach_div = None
+	        _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]);  getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None
+	        getitem_1924 = _foreach_reciprocal[0]
+	        getitem_1925 = _foreach_reciprocal[1]
+	        getitem_1926 = _foreach_reciprocal[2]
+	        getitem_1927 = _foreach_reciprocal[3]
+	        getitem_1928 = _foreach_reciprocal[4]
+	        getitem_1929 = _foreach_reciprocal[5]
+	        getitem_1930 = _foreach_reciprocal[6]
+	        getitem_1931 = _foreach_reciprocal[7]
+	        getitem_1932 = _foreach_reciprocal[8]
+	        getitem_1933 = _foreach_reciprocal[9]
+	        getitem_1934 = _foreach_reciprocal[10]
+	        getitem_1935 = _foreach_reciprocal[11]
+	        getitem_1936 = _foreach_reciprocal[12]
+	        getitem_1937 = _foreach_reciprocal[13]
+	        getitem_1938 = _foreach_reciprocal[14]
+	        getitem_1939 = _foreach_reciprocal[15]
+	        getitem_1940 = _foreach_reciprocal[16]
+	        getitem_1941 = _foreach_reciprocal[17]
+	        getitem_1942 = _foreach_reciprocal[18]
+	        getitem_1943 = _foreach_reciprocal[19]
+	        getitem_1944 = _foreach_reciprocal[20]
+	        getitem_1945 = _foreach_reciprocal[21]
+	        getitem_1946 = _foreach_reciprocal[22]
+	        getitem_1947 = _foreach_reciprocal[23]
+	        getitem_1948 = _foreach_reciprocal[24]
+	        getitem_1949 = _foreach_reciprocal[25]
+	        getitem_1950 = _foreach_reciprocal[26]
+	        getitem_1951 = _foreach_reciprocal[27]
+	        getitem_1952 = _foreach_reciprocal[28]
+	        getitem_1953 = _foreach_reciprocal[29]
+	        getitem_1954 = _foreach_reciprocal[30]
+	        getitem_1955 = _foreach_reciprocal[31]
+	        getitem_1956 = _foreach_reciprocal[32]
+	        getitem_1957 = _foreach_reciprocal[33]
+	        getitem_1958 = _foreach_reciprocal[34]
+	        getitem_1959 = _foreach_reciprocal[35]
+	        getitem_1960 = _foreach_reciprocal[36]
+	        getitem_1961 = _foreach_reciprocal[37]
+	        getitem_1962 = _foreach_reciprocal[38]
+	        getitem_1963 = _foreach_reciprocal[39]
+	        getitem_1964 = _foreach_reciprocal[40]
+	        getitem_1965 = _foreach_reciprocal[41]
+	        getitem_1966 = _foreach_reciprocal[42]
+	        getitem_1967 = _foreach_reciprocal[43]
+	        getitem_1968 = _foreach_reciprocal[44]
+	        getitem_1969 = _foreach_reciprocal[45]
+	        getitem_1970 = _foreach_reciprocal[46]
+	        getitem_1971 = _foreach_reciprocal[47]
+	        getitem_1972 = _foreach_reciprocal[48]
+	        getitem_1973 = _foreach_reciprocal[49]
+	        getitem_1974 = _foreach_reciprocal[50]
+	        getitem_1975 = _foreach_reciprocal[51]
+	        getitem_1976 = _foreach_reciprocal[52]
+	        getitem_1977 = _foreach_reciprocal[53]
+	        getitem_1978 = _foreach_reciprocal[54]
+	        getitem_1979 = _foreach_reciprocal[55]
+	        getitem_1980 = _foreach_reciprocal[56]
+	        getitem_1981 = _foreach_reciprocal[57]
+	        getitem_1982 = _foreach_reciprocal[58]
+	        getitem_1983 = _foreach_reciprocal[59]
+	        getitem_1984 = _foreach_reciprocal[60]
+	        getitem_1985 = _foreach_reciprocal[61]
+	        getitem_1986 = _foreach_reciprocal[62]
+	        getitem_1987 = _foreach_reciprocal[63]
+	        getitem_1988 = _foreach_reciprocal[64]
+	        getitem_1989 = _foreach_reciprocal[65]
+	        getitem_1990 = _foreach_reciprocal[66]
+	        getitem_1991 = _foreach_reciprocal[67]
+	        getitem_1992 = _foreach_reciprocal[68]
+	        getitem_1993 = _foreach_reciprocal[69]
+	        getitem_1994 = _foreach_reciprocal[70]
+	        getitem_1995 = _foreach_reciprocal[71]
+	        getitem_1996 = _foreach_reciprocal[72]
+	        getitem_1997 = _foreach_reciprocal[73]
+	        getitem_1998 = _foreach_reciprocal[74]
+	        getitem_1999 = _foreach_reciprocal[75]
+	        getitem_2000 = _foreach_reciprocal[76]
+	        getitem_2001 = _foreach_reciprocal[77]
+	        getitem_2002 = _foreach_reciprocal[78]
+	        getitem_2003 = _foreach_reciprocal[79]
+	        getitem_2004 = _foreach_reciprocal[80]
+	        getitem_2005 = _foreach_reciprocal[81]
+	        getitem_2006 = _foreach_reciprocal[82]
+	        getitem_2007 = _foreach_reciprocal[83]
+	        getitem_2008 = _foreach_reciprocal[84]
+	        getitem_2009 = _foreach_reciprocal[85]
+	        getitem_2010 = _foreach_reciprocal[86]
+	        getitem_2011 = _foreach_reciprocal[87]
+	        getitem_2012 = _foreach_reciprocal[88]
+	        getitem_2013 = _foreach_reciprocal[89]
+	        getitem_2014 = _foreach_reciprocal[90]
+	        getitem_2015 = _foreach_reciprocal[91]
+	        getitem_2016 = _foreach_reciprocal[92]
+	        getitem_2017 = _foreach_reciprocal[93]
+	        getitem_2018 = _foreach_reciprocal[94]
+	        getitem_2019 = _foreach_reciprocal[95]
+	        getitem_2020 = _foreach_reciprocal[96]
+	        getitem_2021 = _foreach_reciprocal[97]
+	        getitem_2022 = _foreach_reciprocal[98]
+	        getitem_2023 = _foreach_reciprocal[99]
+	        getitem_2024 = _foreach_reciprocal[100]
+	        getitem_2025 = _foreach_reciprocal[101]
+	        getitem_2026 = _foreach_reciprocal[102]
+	        getitem_2027 = _foreach_reciprocal[103]
+	        getitem_2028 = _foreach_reciprocal[104]
+	        getitem_2029 = _foreach_reciprocal[105]
+	        getitem_2030 = _foreach_reciprocal[106]
+	        getitem_2031 = _foreach_reciprocal[107]
+	        getitem_2032 = _foreach_reciprocal[108]
+	        getitem_2033 = _foreach_reciprocal[109]
+	        getitem_2034 = _foreach_reciprocal[110]
+	        getitem_2035 = _foreach_reciprocal[111]
+	        getitem_2036 = _foreach_reciprocal[112]
+	        getitem_2037 = _foreach_reciprocal[113]
+	        getitem_2038 = _foreach_reciprocal[114]
+	        getitem_2039 = _foreach_reciprocal[115]
+	        getitem_2040 = _foreach_reciprocal[116]
+	        getitem_2041 = _foreach_reciprocal[117]
+	        getitem_2042 = _foreach_reciprocal[118]
+	        getitem_2043 = _foreach_reciprocal[119]
+	        getitem_2044 = _foreach_reciprocal[120]
+	        getitem_2045 = _foreach_reciprocal[121]
+	        getitem_2046 = _foreach_reciprocal[122]
+	        getitem_2047 = _foreach_reciprocal[123]
+	        getitem_2048 = _foreach_reciprocal[124]
+	        getitem_2049 = _foreach_reciprocal[125]
+	        getitem_2050 = _foreach_reciprocal[126]
+	        getitem_2051 = _foreach_reciprocal[127]
+	        getitem_2052 = _foreach_reciprocal[128]
+	        getitem_2053 = _foreach_reciprocal[129]
+	        getitem_2054 = _foreach_reciprocal[130]
+	        getitem_2055 = _foreach_reciprocal[131]
+	        getitem_2056 = _foreach_reciprocal[132]
+	        getitem_2057 = _foreach_reciprocal[133]
+	        getitem_2058 = _foreach_reciprocal[134]
+	        getitem_2059 = _foreach_reciprocal[135]
+	        getitem_2060 = _foreach_reciprocal[136]
+	        getitem_2061 = _foreach_reciprocal[137]
+	        getitem_2062 = _foreach_reciprocal[138]
+	        getitem_2063 = _foreach_reciprocal[139]
+	        getitem_2064 = _foreach_reciprocal[140]
+	        getitem_2065 = _foreach_reciprocal[141]
+	        getitem_2066 = _foreach_reciprocal[142]
+	        getitem_2067 = _foreach_reciprocal[143]
+	        getitem_2068 = _foreach_reciprocal[144]
+	        getitem_2069 = _foreach_reciprocal[145]
+	        getitem_2070 = _foreach_reciprocal[146]
+	        getitem_2071 = _foreach_reciprocal[147];  _foreach_reciprocal = None
+	        _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]);  getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None
+	        getitem_2072 = _foreach_sqrt[0]
+	        getitem_2073 = _foreach_sqrt[1]
+	        getitem_2074 = _foreach_sqrt[2]
+	        getitem_2075 = _foreach_sqrt[3]
+	        getitem_2076 = _foreach_sqrt[4]
+	        getitem_2077 = _foreach_sqrt[5]
+	        getitem_2078 = _foreach_sqrt[6]
+	        getitem_2079 = _foreach_sqrt[7]
+	        getitem_2080 = _foreach_sqrt[8]
+	        getitem_2081 = _foreach_sqrt[9]
+	        getitem_2082 = _foreach_sqrt[10]
+	        getitem_2083 = _foreach_sqrt[11]
+	        getitem_2084 = _foreach_sqrt[12]
+	        getitem_2085 = _foreach_sqrt[13]
+	        getitem_2086 = _foreach_sqrt[14]
+	        getitem_2087 = _foreach_sqrt[15]
+	        getitem_2088 = _foreach_sqrt[16]
+	        getitem_2089 = _foreach_sqrt[17]
+	        getitem_2090 = _foreach_sqrt[18]
+	        getitem_2091 = _foreach_sqrt[19]
+	        getitem_2092 = _foreach_sqrt[20]
+	        getitem_2093 = _foreach_sqrt[21]
+	        getitem_2094 = _foreach_sqrt[22]
+	        getitem_2095 = _foreach_sqrt[23]
+	        getitem_2096 = _foreach_sqrt[24]
+	        getitem_2097 = _foreach_sqrt[25]
+	        getitem_2098 = _foreach_sqrt[26]
+	        getitem_2099 = _foreach_sqrt[27]
+	        getitem_2100 = _foreach_sqrt[28]
+	        getitem_2101 = _foreach_sqrt[29]
+	        getitem_2102 = _foreach_sqrt[30]
+	        getitem_2103 = _foreach_sqrt[31]
+	        getitem_2104 = _foreach_sqrt[32]
+	        getitem_2105 = _foreach_sqrt[33]
+	        getitem_2106 = _foreach_sqrt[34]
+	        getitem_2107 = _foreach_sqrt[35]
+	        getitem_2108 = _foreach_sqrt[36]
+	        getitem_2109 = _foreach_sqrt[37]
+	        getitem_2110 = _foreach_sqrt[38]
+	        getitem_2111 = _foreach_sqrt[39]
+	        getitem_2112 = _foreach_sqrt[40]
+	        getitem_2113 = _foreach_sqrt[41]
+	        getitem_2114 = _foreach_sqrt[42]
+	        getitem_2115 = _foreach_sqrt[43]
+	        getitem_2116 = _foreach_sqrt[44]
+	        getitem_2117 = _foreach_sqrt[45]
+	        getitem_2118 = _foreach_sqrt[46]
+	        getitem_2119 = _foreach_sqrt[47]
+	        getitem_2120 = _foreach_sqrt[48]
+	        getitem_2121 = _foreach_sqrt[49]
+	        getitem_2122 = _foreach_sqrt[50]
+	        getitem_2123 = _foreach_sqrt[51]
+	        getitem_2124 = _foreach_sqrt[52]
+	        getitem_2125 = _foreach_sqrt[53]
+	        getitem_2126 = _foreach_sqrt[54]
+	        getitem_2127 = _foreach_sqrt[55]
+	        getitem_2128 = _foreach_sqrt[56]
+	        getitem_2129 = _foreach_sqrt[57]
+	        getitem_2130 = _foreach_sqrt[58]
+	        getitem_2131 = _foreach_sqrt[59]
+	        getitem_2132 = _foreach_sqrt[60]
+	        getitem_2133 = _foreach_sqrt[61]
+	        getitem_2134 = _foreach_sqrt[62]
+	        getitem_2135 = _foreach_sqrt[63]
+	        getitem_2136 = _foreach_sqrt[64]
+	        getitem_2137 = _foreach_sqrt[65]
+	        getitem_2138 = _foreach_sqrt[66]
+	        getitem_2139 = _foreach_sqrt[67]
+	        getitem_2140 = _foreach_sqrt[68]
+	        getitem_2141 = _foreach_sqrt[69]
+	        getitem_2142 = _foreach_sqrt[70]
+	        getitem_2143 = _foreach_sqrt[71]
+	        getitem_2144 = _foreach_sqrt[72]
+	        getitem_2145 = _foreach_sqrt[73]
+	        getitem_2146 = _foreach_sqrt[74]
+	        getitem_2147 = _foreach_sqrt[75]
+	        getitem_2148 = _foreach_sqrt[76]
+	        getitem_2149 = _foreach_sqrt[77]
+	        getitem_2150 = _foreach_sqrt[78]
+	        getitem_2151 = _foreach_sqrt[79]
+	        getitem_2152 = _foreach_sqrt[80]
+	        getitem_2153 = _foreach_sqrt[81]
+	        getitem_2154 = _foreach_sqrt[82]
+	        getitem_2155 = _foreach_sqrt[83]
+	        getitem_2156 = _foreach_sqrt[84]
+	        getitem_2157 = _foreach_sqrt[85]
+	        getitem_2158 = _foreach_sqrt[86]
+	        getitem_2159 = _foreach_sqrt[87]
+	        getitem_2160 = _foreach_sqrt[88]
+	        getitem_2161 = _foreach_sqrt[89]
+	        getitem_2162 = _foreach_sqrt[90]
+	        getitem_2163 = _foreach_sqrt[91]
+	        getitem_2164 = _foreach_sqrt[92]
+	        getitem_2165 = _foreach_sqrt[93]
+	        getitem_2166 = _foreach_sqrt[94]
+	        getitem_2167 = _foreach_sqrt[95]
+	        getitem_2168 = _foreach_sqrt[96]
+	        getitem_2169 = _foreach_sqrt[97]
+	        getitem_2170 = _foreach_sqrt[98]
+	        getitem_2171 = _foreach_sqrt[99]
+	        getitem_2172 = _foreach_sqrt[100]
+	        getitem_2173 = _foreach_sqrt[101]
+	        getitem_2174 = _foreach_sqrt[102]
+	        getitem_2175 = _foreach_sqrt[103]
+	        getitem_2176 = _foreach_sqrt[104]
+	        getitem_2177 = _foreach_sqrt[105]
+	        getitem_2178 = _foreach_sqrt[106]
+	        getitem_2179 = _foreach_sqrt[107]
+	        getitem_2180 = _foreach_sqrt[108]
+	        getitem_2181 = _foreach_sqrt[109]
+	        getitem_2182 = _foreach_sqrt[110]
+	        getitem_2183 = _foreach_sqrt[111]
+	        getitem_2184 = _foreach_sqrt[112]
+	        getitem_2185 = _foreach_sqrt[113]
+	        getitem_2186 = _foreach_sqrt[114]
+	        getitem_2187 = _foreach_sqrt[115]
+	        getitem_2188 = _foreach_sqrt[116]
+	        getitem_2189 = _foreach_sqrt[117]
+	        getitem_2190 = _foreach_sqrt[118]
+	        getitem_2191 = _foreach_sqrt[119]
+	        getitem_2192 = _foreach_sqrt[120]
+	        getitem_2193 = _foreach_sqrt[121]
+	        getitem_2194 = _foreach_sqrt[122]
+	        getitem_2195 = _foreach_sqrt[123]
+	        getitem_2196 = _foreach_sqrt[124]
+	        getitem_2197 = _foreach_sqrt[125]
+	        getitem_2198 = _foreach_sqrt[126]
+	        getitem_2199 = _foreach_sqrt[127]
+	        getitem_2200 = _foreach_sqrt[128]
+	        getitem_2201 = _foreach_sqrt[129]
+	        getitem_2202 = _foreach_sqrt[130]
+	        getitem_2203 = _foreach_sqrt[131]
+	        getitem_2204 = _foreach_sqrt[132]
+	        getitem_2205 = _foreach_sqrt[133]
+	        getitem_2206 = _foreach_sqrt[134]
+	        getitem_2207 = _foreach_sqrt[135]
+	        getitem_2208 = _foreach_sqrt[136]
+	        getitem_2209 = _foreach_sqrt[137]
+	        getitem_2210 = _foreach_sqrt[138]
+	        getitem_2211 = _foreach_sqrt[139]
+	        getitem_2212 = _foreach_sqrt[140]
+	        getitem_2213 = _foreach_sqrt[141]
+	        getitem_2214 = _foreach_sqrt[142]
+	        getitem_2215 = _foreach_sqrt[143]
+	        getitem_2216 = _foreach_sqrt[144]
+	        getitem_2217 = _foreach_sqrt[145]
+	        getitem_2218 = _foreach_sqrt[146]
+	        getitem_2219 = _foreach_sqrt[147];  _foreach_sqrt = None
+	        _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])
+	        getitem_2220 = _foreach_sqrt_1[0]
+	        getitem_2221 = _foreach_sqrt_1[1]
+	        getitem_2222 = _foreach_sqrt_1[2]
+	        getitem_2223 = _foreach_sqrt_1[3]
+	        getitem_2224 = _foreach_sqrt_1[4]
+	        getitem_2225 = _foreach_sqrt_1[5]
+	        getitem_2226 = _foreach_sqrt_1[6]
+	        getitem_2227 = _foreach_sqrt_1[7]
+	        getitem_2228 = _foreach_sqrt_1[8]
+	        getitem_2229 = _foreach_sqrt_1[9]
+	        getitem_2230 = _foreach_sqrt_1[10]
+	        getitem_2231 = _foreach_sqrt_1[11]
+	        getitem_2232 = _foreach_sqrt_1[12]
+	        getitem_2233 = _foreach_sqrt_1[13]
+	        getitem_2234 = _foreach_sqrt_1[14]
+	        getitem_2235 = _foreach_sqrt_1[15]
+	        getitem_2236 = _foreach_sqrt_1[16]
+	        getitem_2237 = _foreach_sqrt_1[17]
+	        getitem_2238 = _foreach_sqrt_1[18]
+	        getitem_2239 = _foreach_sqrt_1[19]
+	        getitem_2240 = _foreach_sqrt_1[20]
+	        getitem_2241 = _foreach_sqrt_1[21]
+	        getitem_2242 = _foreach_sqrt_1[22]
+	        getitem_2243 = _foreach_sqrt_1[23]
+	        getitem_2244 = _foreach_sqrt_1[24]
+	        getitem_2245 = _foreach_sqrt_1[25]
+	        getitem_2246 = _foreach_sqrt_1[26]
+	        getitem_2247 = _foreach_sqrt_1[27]
+	        getitem_2248 = _foreach_sqrt_1[28]
+	        getitem_2249 = _foreach_sqrt_1[29]
+	        getitem_2250 = _foreach_sqrt_1[30]
+	        getitem_2251 = _foreach_sqrt_1[31]
+	        getitem_2252 = _foreach_sqrt_1[32]
+	        getitem_2253 = _foreach_sqrt_1[33]
+	        getitem_2254 = _foreach_sqrt_1[34]
+	        getitem_2255 = _foreach_sqrt_1[35]
+	        getitem_2256 = _foreach_sqrt_1[36]
+	        getitem_2257 = _foreach_sqrt_1[37]
+	        getitem_2258 = _foreach_sqrt_1[38]
+	        getitem_2259 = _foreach_sqrt_1[39]
+	        getitem_2260 = _foreach_sqrt_1[40]
+	        getitem_2261 = _foreach_sqrt_1[41]
+	        getitem_2262 = _foreach_sqrt_1[42]
+	        getitem_2263 = _foreach_sqrt_1[43]
+	        getitem_2264 = _foreach_sqrt_1[44]
+	        getitem_2265 = _foreach_sqrt_1[45]
+	        getitem_2266 = _foreach_sqrt_1[46]
+	        getitem_2267 = _foreach_sqrt_1[47]
+	        getitem_2268 = _foreach_sqrt_1[48]
+	        getitem_2269 = _foreach_sqrt_1[49]
+	        getitem_2270 = _foreach_sqrt_1[50]
+	        getitem_2271 = _foreach_sqrt_1[51]
+	        getitem_2272 = _foreach_sqrt_1[52]
+	        getitem_2273 = _foreach_sqrt_1[53]
+	        getitem_2274 = _foreach_sqrt_1[54]
+	        getitem_2275 = _foreach_sqrt_1[55]
+	        getitem_2276 = _foreach_sqrt_1[56]
+	        getitem_2277 = _foreach_sqrt_1[57]
+	        getitem_2278 = _foreach_sqrt_1[58]
+	        getitem_2279 = _foreach_sqrt_1[59]
+	        getitem_2280 = _foreach_sqrt_1[60]
+	        getitem_2281 = _foreach_sqrt_1[61]
+	        getitem_2282 = _foreach_sqrt_1[62]
+	        getitem_2283 = _foreach_sqrt_1[63]
+	        getitem_2284 = _foreach_sqrt_1[64]
+	        getitem_2285 = _foreach_sqrt_1[65]
+	        getitem_2286 = _foreach_sqrt_1[66]
+	        getitem_2287 = _foreach_sqrt_1[67]
+	        getitem_2288 = _foreach_sqrt_1[68]
+	        getitem_2289 = _foreach_sqrt_1[69]
+	        getitem_2290 = _foreach_sqrt_1[70]
+	        getitem_2291 = _foreach_sqrt_1[71]
+	        getitem_2292 = _foreach_sqrt_1[72]
+	        getitem_2293 = _foreach_sqrt_1[73]
+	        getitem_2294 = _foreach_sqrt_1[74]
+	        getitem_2295 = _foreach_sqrt_1[75]
+	        getitem_2296 = _foreach_sqrt_1[76]
+	        getitem_2297 = _foreach_sqrt_1[77]
+	        getitem_2298 = _foreach_sqrt_1[78]
+	        getitem_2299 = _foreach_sqrt_1[79]
+	        getitem_2300 = _foreach_sqrt_1[80]
+	        getitem_2301 = _foreach_sqrt_1[81]
+	        getitem_2302 = _foreach_sqrt_1[82]
+	        getitem_2303 = _foreach_sqrt_1[83]
+	        getitem_2304 = _foreach_sqrt_1[84]
+	        getitem_2305 = _foreach_sqrt_1[85]
+	        getitem_2306 = _foreach_sqrt_1[86]
+	        getitem_2307 = _foreach_sqrt_1[87]
+	        getitem_2308 = _foreach_sqrt_1[88]
+	        getitem_2309 = _foreach_sqrt_1[89]
+	        getitem_2310 = _foreach_sqrt_1[90]
+	        getitem_2311 = _foreach_sqrt_1[91]
+	        getitem_2312 = _foreach_sqrt_1[92]
+	        getitem_2313 = _foreach_sqrt_1[93]
+	        getitem_2314 = _foreach_sqrt_1[94]
+	        getitem_2315 = _foreach_sqrt_1[95]
+	        getitem_2316 = _foreach_sqrt_1[96]
+	        getitem_2317 = _foreach_sqrt_1[97]
+	        getitem_2318 = _foreach_sqrt_1[98]
+	        getitem_2319 = _foreach_sqrt_1[99]
+	        getitem_2320 = _foreach_sqrt_1[100]
+	        getitem_2321 = _foreach_sqrt_1[101]
+	        getitem_2322 = _foreach_sqrt_1[102]
+	        getitem_2323 = _foreach_sqrt_1[103]
+	        getitem_2324 = _foreach_sqrt_1[104]
+	        getitem_2325 = _foreach_sqrt_1[105]
+	        getitem_2326 = _foreach_sqrt_1[106]
+	        getitem_2327 = _foreach_sqrt_1[107]
+	        getitem_2328 = _foreach_sqrt_1[108]
+	        getitem_2329 = _foreach_sqrt_1[109]
+	        getitem_2330 = _foreach_sqrt_1[110]
+	        getitem_2331 = _foreach_sqrt_1[111]
+	        getitem_2332 = _foreach_sqrt_1[112]
+	        getitem_2333 = _foreach_sqrt_1[113]
+	        getitem_2334 = _foreach_sqrt_1[114]
+	        getitem_2335 = _foreach_sqrt_1[115]
+	        getitem_2336 = _foreach_sqrt_1[116]
+	        getitem_2337 = _foreach_sqrt_1[117]
+	        getitem_2338 = _foreach_sqrt_1[118]
+	        getitem_2339 = _foreach_sqrt_1[119]
+	        getitem_2340 = _foreach_sqrt_1[120]
+	        getitem_2341 = _foreach_sqrt_1[121]
+	        getitem_2342 = _foreach_sqrt_1[122]
+	        getitem_2343 = _foreach_sqrt_1[123]
+	        getitem_2344 = _foreach_sqrt_1[124]
+	        getitem_2345 = _foreach_sqrt_1[125]
+	        getitem_2346 = _foreach_sqrt_1[126]
+	        getitem_2347 = _foreach_sqrt_1[127]
+	        getitem_2348 = _foreach_sqrt_1[128]
+	        getitem_2349 = _foreach_sqrt_1[129]
+	        getitem_2350 = _foreach_sqrt_1[130]
+	        getitem_2351 = _foreach_sqrt_1[131]
+	        getitem_2352 = _foreach_sqrt_1[132]
+	        getitem_2353 = _foreach_sqrt_1[133]
+	        getitem_2354 = _foreach_sqrt_1[134]
+	        getitem_2355 = _foreach_sqrt_1[135]
+	        getitem_2356 = _foreach_sqrt_1[136]
+	        getitem_2357 = _foreach_sqrt_1[137]
+	        getitem_2358 = _foreach_sqrt_1[138]
+	        getitem_2359 = _foreach_sqrt_1[139]
+	        getitem_2360 = _foreach_sqrt_1[140]
+	        getitem_2361 = _foreach_sqrt_1[141]
+	        getitem_2362 = _foreach_sqrt_1[142]
+	        getitem_2363 = _foreach_sqrt_1[143]
+	        getitem_2364 = _foreach_sqrt_1[144]
+	        getitem_2365 = _foreach_sqrt_1[145]
+	        getitem_2366 = _foreach_sqrt_1[146]
+	        getitem_2367 = _foreach_sqrt_1[147];  _foreach_sqrt_1 = None
+	        _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]);  getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None
+	        getitem_2368 = _foreach_div_1[0]
+	        getitem_2369 = _foreach_div_1[1]
+	        getitem_2370 = _foreach_div_1[2]
+	        getitem_2371 = _foreach_div_1[3]
+	        getitem_2372 = _foreach_div_1[4]
+	        getitem_2373 = _foreach_div_1[5]
+	        getitem_2374 = _foreach_div_1[6]
+	        getitem_2375 = _foreach_div_1[7]
+	        getitem_2376 = _foreach_div_1[8]
+	        getitem_2377 = _foreach_div_1[9]
+	        getitem_2378 = _foreach_div_1[10]
+	        getitem_2379 = _foreach_div_1[11]
+	        getitem_2380 = _foreach_div_1[12]
+	        getitem_2381 = _foreach_div_1[13]
+	        getitem_2382 = _foreach_div_1[14]
+	        getitem_2383 = _foreach_div_1[15]
+	        getitem_2384 = _foreach_div_1[16]
+	        getitem_2385 = _foreach_div_1[17]
+	        getitem_2386 = _foreach_div_1[18]
+	        getitem_2387 = _foreach_div_1[19]
+	        getitem_2388 = _foreach_div_1[20]
+	        getitem_2389 = _foreach_div_1[21]
+	        getitem_2390 = _foreach_div_1[22]
+	        getitem_2391 = _foreach_div_1[23]
+	        getitem_2392 = _foreach_div_1[24]
+	        getitem_2393 = _foreach_div_1[25]
+	        getitem_2394 = _foreach_div_1[26]
+	        getitem_2395 = _foreach_div_1[27]
+	        getitem_2396 = _foreach_div_1[28]
+	        getitem_2397 = _foreach_div_1[29]
+	        getitem_2398 = _foreach_div_1[30]
+	        getitem_2399 = _foreach_div_1[31]
+	        getitem_2400 = _foreach_div_1[32]
+	        getitem_2401 = _foreach_div_1[33]
+	        getitem_2402 = _foreach_div_1[34]
+	        getitem_2403 = _foreach_div_1[35]
+	        getitem_2404 = _foreach_div_1[36]
+	        getitem_2405 = _foreach_div_1[37]
+	        getitem_2406 = _foreach_div_1[38]
+	        getitem_2407 = _foreach_div_1[39]
+	        getitem_2408 = _foreach_div_1[40]
+	        getitem_2409 = _foreach_div_1[41]
+	        getitem_2410 = _foreach_div_1[42]
+	        getitem_2411 = _foreach_div_1[43]
+	        getitem_2412 = _foreach_div_1[44]
+	        getitem_2413 = _foreach_div_1[45]
+	        getitem_2414 = _foreach_div_1[46]
+	        getitem_2415 = _foreach_div_1[47]
+	        getitem_2416 = _foreach_div_1[48]
+	        getitem_2417 = _foreach_div_1[49]
+	        getitem_2418 = _foreach_div_1[50]
+	        getitem_2419 = _foreach_div_1[51]
+	        getitem_2420 = _foreach_div_1[52]
+	        getitem_2421 = _foreach_div_1[53]
+	        getitem_2422 = _foreach_div_1[54]
+	        getitem_2423 = _foreach_div_1[55]
+	        getitem_2424 = _foreach_div_1[56]
+	        getitem_2425 = _foreach_div_1[57]
+	        getitem_2426 = _foreach_div_1[58]
+	        getitem_2427 = _foreach_div_1[59]
+	        getitem_2428 = _foreach_div_1[60]
+	        getitem_2429 = _foreach_div_1[61]
+	        getitem_2430 = _foreach_div_1[62]
+	        getitem_2431 = _foreach_div_1[63]
+	        getitem_2432 = _foreach_div_1[64]
+	        getitem_2433 = _foreach_div_1[65]
+	        getitem_2434 = _foreach_div_1[66]
+	        getitem_2435 = _foreach_div_1[67]
+	        getitem_2436 = _foreach_div_1[68]
+	        getitem_2437 = _foreach_div_1[69]
+	        getitem_2438 = _foreach_div_1[70]
+	        getitem_2439 = _foreach_div_1[71]
+	        getitem_2440 = _foreach_div_1[72]
+	        getitem_2441 = _foreach_div_1[73]
+	        getitem_2442 = _foreach_div_1[74]
+	        getitem_2443 = _foreach_div_1[75]
+	        getitem_2444 = _foreach_div_1[76]
+	        getitem_2445 = _foreach_div_1[77]
+	        getitem_2446 = _foreach_div_1[78]
+	        getitem_2447 = _foreach_div_1[79]
+	        getitem_2448 = _foreach_div_1[80]
+	        getitem_2449 = _foreach_div_1[81]
+	        getitem_2450 = _foreach_div_1[82]
+	        getitem_2451 = _foreach_div_1[83]
+	        getitem_2452 = _foreach_div_1[84]
+	        getitem_2453 = _foreach_div_1[85]
+	        getitem_2454 = _foreach_div_1[86]
+	        getitem_2455 = _foreach_div_1[87]
+	        getitem_2456 = _foreach_div_1[88]
+	        getitem_2457 = _foreach_div_1[89]
+	        getitem_2458 = _foreach_div_1[90]
+	        getitem_2459 = _foreach_div_1[91]
+	        getitem_2460 = _foreach_div_1[92]
+	        getitem_2461 = _foreach_div_1[93]
+	        getitem_2462 = _foreach_div_1[94]
+	        getitem_2463 = _foreach_div_1[95]
+	        getitem_2464 = _foreach_div_1[96]
+	        getitem_2465 = _foreach_div_1[97]
+	        getitem_2466 = _foreach_div_1[98]
+	        getitem_2467 = _foreach_div_1[99]
+	        getitem_2468 = _foreach_div_1[100]
+	        getitem_2469 = _foreach_div_1[101]
+	        getitem_2470 = _foreach_div_1[102]
+	        getitem_2471 = _foreach_div_1[103]
+	        getitem_2472 = _foreach_div_1[104]
+	        getitem_2473 = _foreach_div_1[105]
+	        getitem_2474 = _foreach_div_1[106]
+	        getitem_2475 = _foreach_div_1[107]
+	        getitem_2476 = _foreach_div_1[108]
+	        getitem_2477 = _foreach_div_1[109]
+	        getitem_2478 = _foreach_div_1[110]
+	        getitem_2479 = _foreach_div_1[111]
+	        getitem_2480 = _foreach_div_1[112]
+	        getitem_2481 = _foreach_div_1[113]
+	        getitem_2482 = _foreach_div_1[114]
+	        getitem_2483 = _foreach_div_1[115]
+	        getitem_2484 = _foreach_div_1[116]
+	        getitem_2485 = _foreach_div_1[117]
+	        getitem_2486 = _foreach_div_1[118]
+	        getitem_2487 = _foreach_div_1[119]
+	        getitem_2488 = _foreach_div_1[120]
+	        getitem_2489 = _foreach_div_1[121]
+	        getitem_2490 = _foreach_div_1[122]
+	        getitem_2491 = _foreach_div_1[123]
+	        getitem_2492 = _foreach_div_1[124]
+	        getitem_2493 = _foreach_div_1[125]
+	        getitem_2494 = _foreach_div_1[126]
+	        getitem_2495 = _foreach_div_1[127]
+	        getitem_2496 = _foreach_div_1[128]
+	        getitem_2497 = _foreach_div_1[129]
+	        getitem_2498 = _foreach_div_1[130]
+	        getitem_2499 = _foreach_div_1[131]
+	        getitem_2500 = _foreach_div_1[132]
+	        getitem_2501 = _foreach_div_1[133]
+	        getitem_2502 = _foreach_div_1[134]
+	        getitem_2503 = _foreach_div_1[135]
+	        getitem_2504 = _foreach_div_1[136]
+	        getitem_2505 = _foreach_div_1[137]
+	        getitem_2506 = _foreach_div_1[138]
+	        getitem_2507 = _foreach_div_1[139]
+	        getitem_2508 = _foreach_div_1[140]
+	        getitem_2509 = _foreach_div_1[141]
+	        getitem_2510 = _foreach_div_1[142]
+	        getitem_2511 = _foreach_div_1[143]
+	        getitem_2512 = _foreach_div_1[144]
+	        getitem_2513 = _foreach_div_1[145]
+	        getitem_2514 = _foreach_div_1[146]
+	        getitem_2515 = _foreach_div_1[147];  _foreach_div_1 = None
+	        _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08);  getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None
+	        getitem_2516 = _foreach_add_3[0]
+	        getitem_2517 = _foreach_add_3[1]
+	        getitem_2518 = _foreach_add_3[2]
+	        getitem_2519 = _foreach_add_3[3]
+	        getitem_2520 = _foreach_add_3[4]
+	        getitem_2521 = _foreach_add_3[5]
+	        getitem_2522 = _foreach_add_3[6]
+	        getitem_2523 = _foreach_add_3[7]
+	        getitem_2524 = _foreach_add_3[8]
+	        getitem_2525 = _foreach_add_3[9]
+	        getitem_2526 = _foreach_add_3[10]
+	        getitem_2527 = _foreach_add_3[11]
+	        getitem_2528 = _foreach_add_3[12]
+	        getitem_2529 = _foreach_add_3[13]
+	        getitem_2530 = _foreach_add_3[14]
+	        getitem_2531 = _foreach_add_3[15]
+	        getitem_2532 = _foreach_add_3[16]
+	        getitem_2533 = _foreach_add_3[17]
+	        getitem_2534 = _foreach_add_3[18]
+	        getitem_2535 = _foreach_add_3[19]
+	        getitem_2536 = _foreach_add_3[20]
+	        getitem_2537 = _foreach_add_3[21]
+	        getitem_2538 = _foreach_add_3[22]
+	        getitem_2539 = _foreach_add_3[23]
+	        getitem_2540 = _foreach_add_3[24]
+	        getitem_2541 = _foreach_add_3[25]
+	        getitem_2542 = _foreach_add_3[26]
+	        getitem_2543 = _foreach_add_3[27]
+	        getitem_2544 = _foreach_add_3[28]
+	        getitem_2545 = _foreach_add_3[29]
+	        getitem_2546 = _foreach_add_3[30]
+	        getitem_2547 = _foreach_add_3[31]
+	        getitem_2548 = _foreach_add_3[32]
+	        getitem_2549 = _foreach_add_3[33]
+	        getitem_2550 = _foreach_add_3[34]
+	        getitem_2551 = _foreach_add_3[35]
+	        getitem_2552 = _foreach_add_3[36]
+	        getitem_2553 = _foreach_add_3[37]
+	        getitem_2554 = _foreach_add_3[38]
+	        getitem_2555 = _foreach_add_3[39]
+	        getitem_2556 = _foreach_add_3[40]
+	        getitem_2557 = _foreach_add_3[41]
+	        getitem_2558 = _foreach_add_3[42]
+	        getitem_2559 = _foreach_add_3[43]
+	        getitem_2560 = _foreach_add_3[44]
+	        getitem_2561 = _foreach_add_3[45]
+	        getitem_2562 = _foreach_add_3[46]
+	        getitem_2563 = _foreach_add_3[47]
+	        getitem_2564 = _foreach_add_3[48]
+	        getitem_2565 = _foreach_add_3[49]
+	        getitem_2566 = _foreach_add_3[50]
+	        getitem_2567 = _foreach_add_3[51]
+	        getitem_2568 = _foreach_add_3[52]
+	        getitem_2569 = _foreach_add_3[53]
+	        getitem_2570 = _foreach_add_3[54]
+	        getitem_2571 = _foreach_add_3[55]
+	        getitem_2572 = _foreach_add_3[56]
+	        getitem_2573 = _foreach_add_3[57]
+	        getitem_2574 = _foreach_add_3[58]
+	        getitem_2575 = _foreach_add_3[59]
+	        getitem_2576 = _foreach_add_3[60]
+	        getitem_2577 = _foreach_add_3[61]
+	        getitem_2578 = _foreach_add_3[62]
+	        getitem_2579 = _foreach_add_3[63]
+	        getitem_2580 = _foreach_add_3[64]
+	        getitem_2581 = _foreach_add_3[65]
+	        getitem_2582 = _foreach_add_3[66]
+	        getitem_2583 = _foreach_add_3[67]
+	        getitem_2584 = _foreach_add_3[68]
+	        getitem_2585 = _foreach_add_3[69]
+	        getitem_2586 = _foreach_add_3[70]
+	        getitem_2587 = _foreach_add_3[71]
+	        getitem_2588 = _foreach_add_3[72]
+	        getitem_2589 = _foreach_add_3[73]
+	        getitem_2590 = _foreach_add_3[74]
+	        getitem_2591 = _foreach_add_3[75]
+	        getitem_2592 = _foreach_add_3[76]
+	        getitem_2593 = _foreach_add_3[77]
+	        getitem_2594 = _foreach_add_3[78]
+	        getitem_2595 = _foreach_add_3[79]
+	        getitem_2596 = _foreach_add_3[80]
+	        getitem_2597 = _foreach_add_3[81]
+	        getitem_2598 = _foreach_add_3[82]
+	        getitem_2599 = _foreach_add_3[83]
+	        getitem_2600 = _foreach_add_3[84]
+	        getitem_2601 = _foreach_add_3[85]
+	        getitem_2602 = _foreach_add_3[86]
+	        getitem_2603 = _foreach_add_3[87]
+	        getitem_2604 = _foreach_add_3[88]
+	        getitem_2605 = _foreach_add_3[89]
+	        getitem_2606 = _foreach_add_3[90]
+	        getitem_2607 = _foreach_add_3[91]
+	        getitem_2608 = _foreach_add_3[92]
+	        getitem_2609 = _foreach_add_3[93]
+	        getitem_2610 = _foreach_add_3[94]
+	        getitem_2611 = _foreach_add_3[95]
+	        getitem_2612 = _foreach_add_3[96]
+	        getitem_2613 = _foreach_add_3[97]
+	        getitem_2614 = _foreach_add_3[98]
+	        getitem_2615 = _foreach_add_3[99]
+	        getitem_2616 = _foreach_add_3[100]
+	        getitem_2617 = _foreach_add_3[101]
+	        getitem_2618 = _foreach_add_3[102]
+	        getitem_2619 = _foreach_add_3[103]
+	        getitem_2620 = _foreach_add_3[104]
+	        getitem_2621 = _foreach_add_3[105]
+	        getitem_2622 = _foreach_add_3[106]
+	        getitem_2623 = _foreach_add_3[107]
+	        getitem_2624 = _foreach_add_3[108]
+	        getitem_2625 = _foreach_add_3[109]
+	        getitem_2626 = _foreach_add_3[110]
+	        getitem_2627 = _foreach_add_3[111]
+	        getitem_2628 = _foreach_add_3[112]
+	        getitem_2629 = _foreach_add_3[113]
+	        getitem_2630 = _foreach_add_3[114]
+	        getitem_2631 = _foreach_add_3[115]
+	        getitem_2632 = _foreach_add_3[116]
+	        getitem_2633 = _foreach_add_3[117]
+	        getitem_2634 = _foreach_add_3[118]
+	        getitem_2635 = _foreach_add_3[119]
+	        getitem_2636 = _foreach_add_3[120]
+	        getitem_2637 = _foreach_add_3[121]
+	        getitem_2638 = _foreach_add_3[122]
+	        getitem_2639 = _foreach_add_3[123]
+	        getitem_2640 = _foreach_add_3[124]
+	        getitem_2641 = _foreach_add_3[125]
+	        getitem_2642 = _foreach_add_3[126]
+	        getitem_2643 = _foreach_add_3[127]
+	        getitem_2644 = _foreach_add_3[128]
+	        getitem_2645 = _foreach_add_3[129]
+	        getitem_2646 = _foreach_add_3[130]
+	        getitem_2647 = _foreach_add_3[131]
+	        getitem_2648 = _foreach_add_3[132]
+	        getitem_2649 = _foreach_add_3[133]
+	        getitem_2650 = _foreach_add_3[134]
+	        getitem_2651 = _foreach_add_3[135]
+	        getitem_2652 = _foreach_add_3[136]
+	        getitem_2653 = _foreach_add_3[137]
+	        getitem_2654 = _foreach_add_3[138]
+	        getitem_2655 = _foreach_add_3[139]
+	        getitem_2656 = _foreach_add_3[140]
+	        getitem_2657 = _foreach_add_3[141]
+	        getitem_2658 = _foreach_add_3[142]
+	        getitem_2659 = _foreach_add_3[143]
+	        getitem_2660 = _foreach_add_3[144]
+	        getitem_2661 = _foreach_add_3[145]
+	        getitem_2662 = _foreach_add_3[146]
+	        getitem_2663 = _foreach_add_3[147];  _foreach_add_3 = None
+	        _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]);  getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None
+	        getitem_2664 = _foreach_div_2[0]
+	        getitem_2665 = _foreach_div_2[1]
+	        getitem_2666 = _foreach_div_2[2]
+	        getitem_2667 = _foreach_div_2[3]
+	        getitem_2668 = _foreach_div_2[4]
+	        getitem_2669 = _foreach_div_2[5]
+	        getitem_2670 = _foreach_div_2[6]
+	        getitem_2671 = _foreach_div_2[7]
+	        getitem_2672 = _foreach_div_2[8]
+	        getitem_2673 = _foreach_div_2[9]
+	        getitem_2674 = _foreach_div_2[10]
+	        getitem_2675 = _foreach_div_2[11]
+	        getitem_2676 = _foreach_div_2[12]
+	        getitem_2677 = _foreach_div_2[13]
+	        getitem_2678 = _foreach_div_2[14]
+	        getitem_2679 = _foreach_div_2[15]
+	        getitem_2680 = _foreach_div_2[16]
+	        getitem_2681 = _foreach_div_2[17]
+	        getitem_2682 = _foreach_div_2[18]
+	        getitem_2683 = _foreach_div_2[19]
+	        getitem_2684 = _foreach_div_2[20]
+	        getitem_2685 = _foreach_div_2[21]
+	        getitem_2686 = _foreach_div_2[22]
+	        getitem_2687 = _foreach_div_2[23]
+	        getitem_2688 = _foreach_div_2[24]
+	        getitem_2689 = _foreach_div_2[25]
+	        getitem_2690 = _foreach_div_2[26]
+	        getitem_2691 = _foreach_div_2[27]
+	        getitem_2692 = _foreach_div_2[28]
+	        getitem_2693 = _foreach_div_2[29]
+	        getitem_2694 = _foreach_div_2[30]
+	        getitem_2695 = _foreach_div_2[31]
+	        getitem_2696 = _foreach_div_2[32]
+	        getitem_2697 = _foreach_div_2[33]
+	        getitem_2698 = _foreach_div_2[34]
+	        getitem_2699 = _foreach_div_2[35]
+	        getitem_2700 = _foreach_div_2[36]
+	        getitem_2701 = _foreach_div_2[37]
+	        getitem_2702 = _foreach_div_2[38]
+	        getitem_2703 = _foreach_div_2[39]
+	        getitem_2704 = _foreach_div_2[40]
+	        getitem_2705 = _foreach_div_2[41]
+	        getitem_2706 = _foreach_div_2[42]
+	        getitem_2707 = _foreach_div_2[43]
+	        getitem_2708 = _foreach_div_2[44]
+	        getitem_2709 = _foreach_div_2[45]
+	        getitem_2710 = _foreach_div_2[46]
+	        getitem_2711 = _foreach_div_2[47]
+	        getitem_2712 = _foreach_div_2[48]
+	        getitem_2713 = _foreach_div_2[49]
+	        getitem_2714 = _foreach_div_2[50]
+	        getitem_2715 = _foreach_div_2[51]
+	        getitem_2716 = _foreach_div_2[52]
+	        getitem_2717 = _foreach_div_2[53]
+	        getitem_2718 = _foreach_div_2[54]
+	        getitem_2719 = _foreach_div_2[55]
+	        getitem_2720 = _foreach_div_2[56]
+	        getitem_2721 = _foreach_div_2[57]
+	        getitem_2722 = _foreach_div_2[58]
+	        getitem_2723 = _foreach_div_2[59]
+	        getitem_2724 = _foreach_div_2[60]
+	        getitem_2725 = _foreach_div_2[61]
+	        getitem_2726 = _foreach_div_2[62]
+	        getitem_2727 = _foreach_div_2[63]
+	        getitem_2728 = _foreach_div_2[64]
+	        getitem_2729 = _foreach_div_2[65]
+	        getitem_2730 = _foreach_div_2[66]
+	        getitem_2731 = _foreach_div_2[67]
+	        getitem_2732 = _foreach_div_2[68]
+	        getitem_2733 = _foreach_div_2[69]
+	        getitem_2734 = _foreach_div_2[70]
+	        getitem_2735 = _foreach_div_2[71]
+	        getitem_2736 = _foreach_div_2[72]
+	        getitem_2737 = _foreach_div_2[73]
+	        getitem_2738 = _foreach_div_2[74]
+	        getitem_2739 = _foreach_div_2[75]
+	        getitem_2740 = _foreach_div_2[76]
+	        getitem_2741 = _foreach_div_2[77]
+	        getitem_2742 = _foreach_div_2[78]
+	        getitem_2743 = _foreach_div_2[79]
+	        getitem_2744 = _foreach_div_2[80]
+	        getitem_2745 = _foreach_div_2[81]
+	        getitem_2746 = _foreach_div_2[82]
+	        getitem_2747 = _foreach_div_2[83]
+	        getitem_2748 = _foreach_div_2[84]
+	        getitem_2749 = _foreach_div_2[85]
+	        getitem_2750 = _foreach_div_2[86]
+	        getitem_2751 = _foreach_div_2[87]
+	        getitem_2752 = _foreach_div_2[88]
+	        getitem_2753 = _foreach_div_2[89]
+	        getitem_2754 = _foreach_div_2[90]
+	        getitem_2755 = _foreach_div_2[91]
+	        getitem_2756 = _foreach_div_2[92]
+	        getitem_2757 = _foreach_div_2[93]
+	        getitem_2758 = _foreach_div_2[94]
+	        getitem_2759 = _foreach_div_2[95]
+	        getitem_2760 = _foreach_div_2[96]
+	        getitem_2761 = _foreach_div_2[97]
+	        getitem_2762 = _foreach_div_2[98]
+	        getitem_2763 = _foreach_div_2[99]
+	        getitem_2764 = _foreach_div_2[100]
+	        getitem_2765 = _foreach_div_2[101]
+	        getitem_2766 = _foreach_div_2[102]
+	        getitem_2767 = _foreach_div_2[103]
+	        getitem_2768 = _foreach_div_2[104]
+	        getitem_2769 = _foreach_div_2[105]
+	        getitem_2770 = _foreach_div_2[106]
+	        getitem_2771 = _foreach_div_2[107]
+	        getitem_2772 = _foreach_div_2[108]
+	        getitem_2773 = _foreach_div_2[109]
+	        getitem_2774 = _foreach_div_2[110]
+	        getitem_2775 = _foreach_div_2[111]
+	        getitem_2776 = _foreach_div_2[112]
+	        getitem_2777 = _foreach_div_2[113]
+	        getitem_2778 = _foreach_div_2[114]
+	        getitem_2779 = _foreach_div_2[115]
+	        getitem_2780 = _foreach_div_2[116]
+	        getitem_2781 = _foreach_div_2[117]
+	        getitem_2782 = _foreach_div_2[118]
+	        getitem_2783 = _foreach_div_2[119]
+	        getitem_2784 = _foreach_div_2[120]
+	        getitem_2785 = _foreach_div_2[121]
+	        getitem_2786 = _foreach_div_2[122]
+	        getitem_2787 = _foreach_div_2[123]
+	        getitem_2788 = _foreach_div_2[124]
+	        getitem_2789 = _foreach_div_2[125]
+	        getitem_2790 = _foreach_div_2[126]
+	        getitem_2791 = _foreach_div_2[127]
+	        getitem_2792 = _foreach_div_2[128]
+	        getitem_2793 = _foreach_div_2[129]
+	        getitem_2794 = _foreach_div_2[130]
+	        getitem_2795 = _foreach_div_2[131]
+	        getitem_2796 = _foreach_div_2[132]
+	        getitem_2797 = _foreach_div_2[133]
+	        getitem_2798 = _foreach_div_2[134]
+	        getitem_2799 = _foreach_div_2[135]
+	        getitem_2800 = _foreach_div_2[136]
+	        getitem_2801 = _foreach_div_2[137]
+	        getitem_2802 = _foreach_div_2[138]
+	        getitem_2803 = _foreach_div_2[139]
+	        getitem_2804 = _foreach_div_2[140]
+	        getitem_2805 = _foreach_div_2[141]
+	        getitem_2806 = _foreach_div_2[142]
+	        getitem_2807 = _foreach_div_2[143]
+	        getitem_2808 = _foreach_div_2[144]
+	        getitem_2809 = _foreach_div_2[145]
+	        getitem_2810 = _foreach_div_2[146]
+	        getitem_2811 = _foreach_div_2[147];  _foreach_div_2 = None
+	        _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]);  getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None
+	        getitem_2812 = _foreach_div_3[0]
+	        getitem_2813 = _foreach_div_3[1]
+	        getitem_2814 = _foreach_div_3[2]
+	        getitem_2815 = _foreach_div_3[3]
+	        getitem_2816 = _foreach_div_3[4]
+	        getitem_2817 = _foreach_div_3[5]
+	        getitem_2818 = _foreach_div_3[6]
+	        getitem_2819 = _foreach_div_3[7]
+	        getitem_2820 = _foreach_div_3[8]
+	        getitem_2821 = _foreach_div_3[9]
+	        getitem_2822 = _foreach_div_3[10]
+	        getitem_2823 = _foreach_div_3[11]
+	        getitem_2824 = _foreach_div_3[12]
+	        getitem_2825 = _foreach_div_3[13]
+	        getitem_2826 = _foreach_div_3[14]
+	        getitem_2827 = _foreach_div_3[15]
+	        getitem_2828 = _foreach_div_3[16]
+	        getitem_2829 = _foreach_div_3[17]
+	        getitem_2830 = _foreach_div_3[18]
+	        getitem_2831 = _foreach_div_3[19]
+	        getitem_2832 = _foreach_div_3[20]
+	        getitem_2833 = _foreach_div_3[21]
+	        getitem_2834 = _foreach_div_3[22]
+	        getitem_2835 = _foreach_div_3[23]
+	        getitem_2836 = _foreach_div_3[24]
+	        getitem_2837 = _foreach_div_3[25]
+	        getitem_2838 = _foreach_div_3[26]
+	        getitem_2839 = _foreach_div_3[27]
+	        getitem_2840 = _foreach_div_3[28]
+	        getitem_2841 = _foreach_div_3[29]
+	        getitem_2842 = _foreach_div_3[30]
+	        getitem_2843 = _foreach_div_3[31]
+	        getitem_2844 = _foreach_div_3[32]
+	        getitem_2845 = _foreach_div_3[33]
+	        getitem_2846 = _foreach_div_3[34]
+	        getitem_2847 = _foreach_div_3[35]
+	        getitem_2848 = _foreach_div_3[36]
+	        getitem_2849 = _foreach_div_3[37]
+	        getitem_2850 = _foreach_div_3[38]
+	        getitem_2851 = _foreach_div_3[39]
+	        getitem_2852 = _foreach_div_3[40]
+	        getitem_2853 = _foreach_div_3[41]
+	        getitem_2854 = _foreach_div_3[42]
+	        getitem_2855 = _foreach_div_3[43]
+	        getitem_2856 = _foreach_div_3[44]
+	        getitem_2857 = _foreach_div_3[45]
+	        getitem_2858 = _foreach_div_3[46]
+	        getitem_2859 = _foreach_div_3[47]
+	        getitem_2860 = _foreach_div_3[48]
+	        getitem_2861 = _foreach_div_3[49]
+	        getitem_2862 = _foreach_div_3[50]
+	        getitem_2863 = _foreach_div_3[51]
+	        getitem_2864 = _foreach_div_3[52]
+	        getitem_2865 = _foreach_div_3[53]
+	        getitem_2866 = _foreach_div_3[54]
+	        getitem_2867 = _foreach_div_3[55]
+	        getitem_2868 = _foreach_div_3[56]
+	        getitem_2869 = _foreach_div_3[57]
+	        getitem_2870 = _foreach_div_3[58]
+	        getitem_2871 = _foreach_div_3[59]
+	        getitem_2872 = _foreach_div_3[60]
+	        getitem_2873 = _foreach_div_3[61]
+	        getitem_2874 = _foreach_div_3[62]
+	        getitem_2875 = _foreach_div_3[63]
+	        getitem_2876 = _foreach_div_3[64]
+	        getitem_2877 = _foreach_div_3[65]
+	        getitem_2878 = _foreach_div_3[66]
+	        getitem_2879 = _foreach_div_3[67]
+	        getitem_2880 = _foreach_div_3[68]
+	        getitem_2881 = _foreach_div_3[69]
+	        getitem_2882 = _foreach_div_3[70]
+	        getitem_2883 = _foreach_div_3[71]
+	        getitem_2884 = _foreach_div_3[72]
+	        getitem_2885 = _foreach_div_3[73]
+	        getitem_2886 = _foreach_div_3[74]
+	        getitem_2887 = _foreach_div_3[75]
+	        getitem_2888 = _foreach_div_3[76]
+	        getitem_2889 = _foreach_div_3[77]
+	        getitem_2890 = _foreach_div_3[78]
+	        getitem_2891 = _foreach_div_3[79]
+	        getitem_2892 = _foreach_div_3[80]
+	        getitem_2893 = _foreach_div_3[81]
+	        getitem_2894 = _foreach_div_3[82]
+	        getitem_2895 = _foreach_div_3[83]
+	        getitem_2896 = _foreach_div_3[84]
+	        getitem_2897 = _foreach_div_3[85]
+	        getitem_2898 = _foreach_div_3[86]
+	        getitem_2899 = _foreach_div_3[87]
+	        getitem_2900 = _foreach_div_3[88]
+	        getitem_2901 = _foreach_div_3[89]
+	        getitem_2902 = _foreach_div_3[90]
+	        getitem_2903 = _foreach_div_3[91]
+	        getitem_2904 = _foreach_div_3[92]
+	        getitem_2905 = _foreach_div_3[93]
+	        getitem_2906 = _foreach_div_3[94]
+	        getitem_2907 = _foreach_div_3[95]
+	        getitem_2908 = _foreach_div_3[96]
+	        getitem_2909 = _foreach_div_3[97]
+	        getitem_2910 = _foreach_div_3[98]
+	        getitem_2911 = _foreach_div_3[99]
+	        getitem_2912 = _foreach_div_3[100]
+	        getitem_2913 = _foreach_div_3[101]
+	        getitem_2914 = _foreach_div_3[102]
+	        getitem_2915 = _foreach_div_3[103]
+	        getitem_2916 = _foreach_div_3[104]
+	        getitem_2917 = _foreach_div_3[105]
+	        getitem_2918 = _foreach_div_3[106]
+	        getitem_2919 = _foreach_div_3[107]
+	        getitem_2920 = _foreach_div_3[108]
+	        getitem_2921 = _foreach_div_3[109]
+	        getitem_2922 = _foreach_div_3[110]
+	        getitem_2923 = _foreach_div_3[111]
+	        getitem_2924 = _foreach_div_3[112]
+	        getitem_2925 = _foreach_div_3[113]
+	        getitem_2926 = _foreach_div_3[114]
+	        getitem_2927 = _foreach_div_3[115]
+	        getitem_2928 = _foreach_div_3[116]
+	        getitem_2929 = _foreach_div_3[117]
+	        getitem_2930 = _foreach_div_3[118]
+	        getitem_2931 = _foreach_div_3[119]
+	        getitem_2932 = _foreach_div_3[120]
+	        getitem_2933 = _foreach_div_3[121]
+	        getitem_2934 = _foreach_div_3[122]
+	        getitem_2935 = _foreach_div_3[123]
+	        getitem_2936 = _foreach_div_3[124]
+	        getitem_2937 = _foreach_div_3[125]
+	        getitem_2938 = _foreach_div_3[126]
+	        getitem_2939 = _foreach_div_3[127]
+	        getitem_2940 = _foreach_div_3[128]
+	        getitem_2941 = _foreach_div_3[129]
+	        getitem_2942 = _foreach_div_3[130]
+	        getitem_2943 = _foreach_div_3[131]
+	        getitem_2944 = _foreach_div_3[132]
+	        getitem_2945 = _foreach_div_3[133]
+	        getitem_2946 = _foreach_div_3[134]
+	        getitem_2947 = _foreach_div_3[135]
+	        getitem_2948 = _foreach_div_3[136]
+	        getitem_2949 = _foreach_div_3[137]
+	        getitem_2950 = _foreach_div_3[138]
+	        getitem_2951 = _foreach_div_3[139]
+	        getitem_2952 = _foreach_div_3[140]
+	        getitem_2953 = _foreach_div_3[141]
+	        getitem_2954 = _foreach_div_3[142]
+	        getitem_2955 = _foreach_div_3[143]
+	        getitem_2956 = _foreach_div_3[144]
+	        getitem_2957 = _foreach_div_3[145]
+	        getitem_2958 = _foreach_div_3[146]
+	        getitem_2959 = _foreach_div_3[147];  _foreach_div_3 = None
+	        _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]);  getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None
+	        getitem_2960 = _foreach_add_4[0]
+	        getitem_2961 = _foreach_add_4[1]
+	        getitem_2962 = _foreach_add_4[2]
+	        getitem_2963 = _foreach_add_4[3]
+	        getitem_2964 = _foreach_add_4[4]
+	        getitem_2965 = _foreach_add_4[5]
+	        getitem_2966 = _foreach_add_4[6]
+	        getitem_2967 = _foreach_add_4[7]
+	        getitem_2968 = _foreach_add_4[8]
+	        getitem_2969 = _foreach_add_4[9]
+	        getitem_2970 = _foreach_add_4[10]
+	        getitem_2971 = _foreach_add_4[11]
+	        getitem_2972 = _foreach_add_4[12]
+	        getitem_2973 = _foreach_add_4[13]
+	        getitem_2974 = _foreach_add_4[14]
+	        getitem_2975 = _foreach_add_4[15]
+	        getitem_2976 = _foreach_add_4[16]
+	        getitem_2977 = _foreach_add_4[17]
+	        getitem_2978 = _foreach_add_4[18]
+	        getitem_2979 = _foreach_add_4[19]
+	        getitem_2980 = _foreach_add_4[20]
+	        getitem_2981 = _foreach_add_4[21]
+	        getitem_2982 = _foreach_add_4[22]
+	        getitem_2983 = _foreach_add_4[23]
+	        getitem_2984 = _foreach_add_4[24]
+	        getitem_2985 = _foreach_add_4[25]
+	        getitem_2986 = _foreach_add_4[26]
+	        getitem_2987 = _foreach_add_4[27]
+	        getitem_2988 = _foreach_add_4[28]
+	        getitem_2989 = _foreach_add_4[29]
+	        getitem_2990 = _foreach_add_4[30]
+	        getitem_2991 = _foreach_add_4[31]
+	        getitem_2992 = _foreach_add_4[32]
+	        getitem_2993 = _foreach_add_4[33]
+	        getitem_2994 = _foreach_add_4[34]
+	        getitem_2995 = _foreach_add_4[35]
+	        getitem_2996 = _foreach_add_4[36]
+	        getitem_2997 = _foreach_add_4[37]
+	        getitem_2998 = _foreach_add_4[38]
+	        getitem_2999 = _foreach_add_4[39]
+	        getitem_3000 = _foreach_add_4[40]
+	        getitem_3001 = _foreach_add_4[41]
+	        getitem_3002 = _foreach_add_4[42]
+	        getitem_3003 = _foreach_add_4[43]
+	        getitem_3004 = _foreach_add_4[44]
+	        getitem_3005 = _foreach_add_4[45]
+	        getitem_3006 = _foreach_add_4[46]
+	        getitem_3007 = _foreach_add_4[47]
+	        getitem_3008 = _foreach_add_4[48]
+	        getitem_3009 = _foreach_add_4[49]
+	        getitem_3010 = _foreach_add_4[50]
+	        getitem_3011 = _foreach_add_4[51]
+	        getitem_3012 = _foreach_add_4[52]
+	        getitem_3013 = _foreach_add_4[53]
+	        getitem_3014 = _foreach_add_4[54]
+	        getitem_3015 = _foreach_add_4[55]
+	        getitem_3016 = _foreach_add_4[56]
+	        getitem_3017 = _foreach_add_4[57]
+	        getitem_3018 = _foreach_add_4[58]
+	        getitem_3019 = _foreach_add_4[59]
+	        getitem_3020 = _foreach_add_4[60]
+	        getitem_3021 = _foreach_add_4[61]
+	        getitem_3022 = _foreach_add_4[62]
+	        getitem_3023 = _foreach_add_4[63]
+	        getitem_3024 = _foreach_add_4[64]
+	        getitem_3025 = _foreach_add_4[65]
+	        getitem_3026 = _foreach_add_4[66]
+	        getitem_3027 = _foreach_add_4[67]
+	        getitem_3028 = _foreach_add_4[68]
+	        getitem_3029 = _foreach_add_4[69]
+	        getitem_3030 = _foreach_add_4[70]
+	        getitem_3031 = _foreach_add_4[71]
+	        getitem_3032 = _foreach_add_4[72]
+	        getitem_3033 = _foreach_add_4[73]
+	        getitem_3034 = _foreach_add_4[74]
+	        getitem_3035 = _foreach_add_4[75]
+	        getitem_3036 = _foreach_add_4[76]
+	        getitem_3037 = _foreach_add_4[77]
+	        getitem_3038 = _foreach_add_4[78]
+	        getitem_3039 = _foreach_add_4[79]
+	        getitem_3040 = _foreach_add_4[80]
+	        getitem_3041 = _foreach_add_4[81]
+	        getitem_3042 = _foreach_add_4[82]
+	        getitem_3043 = _foreach_add_4[83]
+	        getitem_3044 = _foreach_add_4[84]
+	        getitem_3045 = _foreach_add_4[85]
+	        getitem_3046 = _foreach_add_4[86]
+	        getitem_3047 = _foreach_add_4[87]
+	        getitem_3048 = _foreach_add_4[88]
+	        getitem_3049 = _foreach_add_4[89]
+	        getitem_3050 = _foreach_add_4[90]
+	        getitem_3051 = _foreach_add_4[91]
+	        getitem_3052 = _foreach_add_4[92]
+	        getitem_3053 = _foreach_add_4[93]
+	        getitem_3054 = _foreach_add_4[94]
+	        getitem_3055 = _foreach_add_4[95]
+	        getitem_3056 = _foreach_add_4[96]
+	        getitem_3057 = _foreach_add_4[97]
+	        getitem_3058 = _foreach_add_4[98]
+	        getitem_3059 = _foreach_add_4[99]
+	        getitem_3060 = _foreach_add_4[100]
+	        getitem_3061 = _foreach_add_4[101]
+	        getitem_3062 = _foreach_add_4[102]
+	        getitem_3063 = _foreach_add_4[103]
+	        getitem_3064 = _foreach_add_4[104]
+	        getitem_3065 = _foreach_add_4[105]
+	        getitem_3066 = _foreach_add_4[106]
+	        getitem_3067 = _foreach_add_4[107]
+	        getitem_3068 = _foreach_add_4[108]
+	        getitem_3069 = _foreach_add_4[109]
+	        getitem_3070 = _foreach_add_4[110]
+	        getitem_3071 = _foreach_add_4[111]
+	        getitem_3072 = _foreach_add_4[112]
+	        getitem_3073 = _foreach_add_4[113]
+	        getitem_3074 = _foreach_add_4[114]
+	        getitem_3075 = _foreach_add_4[115]
+	        getitem_3076 = _foreach_add_4[116]
+	        getitem_3077 = _foreach_add_4[117]
+	        getitem_3078 = _foreach_add_4[118]
+	        getitem_3079 = _foreach_add_4[119]
+	        getitem_3080 = _foreach_add_4[120]
+	        getitem_3081 = _foreach_add_4[121]
+	        getitem_3082 = _foreach_add_4[122]
+	        getitem_3083 = _foreach_add_4[123]
+	        getitem_3084 = _foreach_add_4[124]
+	        getitem_3085 = _foreach_add_4[125]
+	        getitem_3086 = _foreach_add_4[126]
+	        getitem_3087 = _foreach_add_4[127]
+	        getitem_3088 = _foreach_add_4[128]
+	        getitem_3089 = _foreach_add_4[129]
+	        getitem_3090 = _foreach_add_4[130]
+	        getitem_3091 = _foreach_add_4[131]
+	        getitem_3092 = _foreach_add_4[132]
+	        getitem_3093 = _foreach_add_4[133]
+	        getitem_3094 = _foreach_add_4[134]
+	        getitem_3095 = _foreach_add_4[135]
+	        getitem_3096 = _foreach_add_4[136]
+	        getitem_3097 = _foreach_add_4[137]
+	        getitem_3098 = _foreach_add_4[138]
+	        getitem_3099 = _foreach_add_4[139]
+	        getitem_3100 = _foreach_add_4[140]
+	        getitem_3101 = _foreach_add_4[141]
+	        getitem_3102 = _foreach_add_4[142]
+	        getitem_3103 = _foreach_add_4[143]
+	        getitem_3104 = _foreach_add_4[144]
+	        getitem_3105 = _foreach_add_4[145]
+	        getitem_3106 = _foreach_add_4[146]
+	        getitem_3107 = _foreach_add_4[147];  _foreach_add_4 = None
+	        copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960);  arg0_1 = getitem_2960 = copy_ = None
+	        copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961);  arg1_1 = getitem_2961 = copy__1 = None
+	        copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962);  arg2_1 = getitem_2962 = copy__2 = None
+	        copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963);  arg3_1 = getitem_2963 = copy__3 = None
+	        copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964);  arg4_1 = getitem_2964 = copy__4 = None
+	        copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965);  arg5_1 = getitem_2965 = copy__5 = None
+	        copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966);  arg6_1 = getitem_2966 = copy__6 = None
+	        copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967);  arg7_1 = getitem_2967 = copy__7 = None
+	        copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968);  arg8_1 = getitem_2968 = copy__8 = None
+	        copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969);  arg9_1 = getitem_2969 = copy__9 = None
+	        copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970);  arg10_1 = getitem_2970 = copy__10 = None
+	        copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971);  arg11_1 = getitem_2971 = copy__11 = None
+	        copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972);  arg12_1 = getitem_2972 = copy__12 = None
+	        copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973);  arg13_1 = getitem_2973 = copy__13 = None
+	        copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974);  arg14_1 = getitem_2974 = copy__14 = None
+	        copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975);  arg15_1 = getitem_2975 = copy__15 = None
+	        copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976);  arg16_1 = getitem_2976 = copy__16 = None
+	        copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977);  arg17_1 = getitem_2977 = copy__17 = None
+	        copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978);  arg18_1 = getitem_2978 = copy__18 = None
+	        copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979);  arg19_1 = getitem_2979 = copy__19 = None
+	        copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980);  arg20_1 = getitem_2980 = copy__20 = None
+	        copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981);  arg21_1 = getitem_2981 = copy__21 = None
+	        copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982);  arg22_1 = getitem_2982 = copy__22 = None
+	        copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983);  arg23_1 = getitem_2983 = copy__23 = None
+	        copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984);  arg24_1 = getitem_2984 = copy__24 = None
+	        copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985);  arg25_1 = getitem_2985 = copy__25 = None
+	        copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986);  arg26_1 = getitem_2986 = copy__26 = None
+	        copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987);  arg27_1 = getitem_2987 = copy__27 = None
+	        copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988);  arg28_1 = getitem_2988 = copy__28 = None
+	        copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989);  arg29_1 = getitem_2989 = copy__29 = None
+	        copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990);  arg30_1 = getitem_2990 = copy__30 = None
+	        copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991);  arg31_1 = getitem_2991 = copy__31 = None
+	        copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992);  arg32_1 = getitem_2992 = copy__32 = None
+	        copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993);  arg33_1 = getitem_2993 = copy__33 = None
+	        copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994);  arg34_1 = getitem_2994 = copy__34 = None
+	        copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995);  arg35_1 = getitem_2995 = copy__35 = None
+	        copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996);  arg36_1 = getitem_2996 = copy__36 = None
+	        copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997);  arg37_1 = getitem_2997 = copy__37 = None
+	        copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998);  arg38_1 = getitem_2998 = copy__38 = None
+	        copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999);  arg39_1 = getitem_2999 = copy__39 = None
+	        copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000);  arg40_1 = getitem_3000 = copy__40 = None
+	        copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001);  arg41_1 = getitem_3001 = copy__41 = None
+	        copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002);  arg42_1 = getitem_3002 = copy__42 = None
+	        copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003);  arg43_1 = getitem_3003 = copy__43 = None
+	        copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004);  arg44_1 = getitem_3004 = copy__44 = None
+	        copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005);  arg45_1 = getitem_3005 = copy__45 = None
+	        copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006);  arg46_1 = getitem_3006 = copy__46 = None
+	        copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007);  arg47_1 = getitem_3007 = copy__47 = None
+	        copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008);  arg48_1 = getitem_3008 = copy__48 = None
+	        copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009);  arg49_1 = getitem_3009 = copy__49 = None
+	        copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010);  arg50_1 = getitem_3010 = copy__50 = None
+	        copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011);  arg51_1 = getitem_3011 = copy__51 = None
+	        copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012);  arg52_1 = getitem_3012 = copy__52 = None
+	        copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013);  arg53_1 = getitem_3013 = copy__53 = None
+	        copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014);  arg54_1 = getitem_3014 = copy__54 = None
+	        copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015);  arg55_1 = getitem_3015 = copy__55 = None
+	        copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016);  arg56_1 = getitem_3016 = copy__56 = None
+	        copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017);  arg57_1 = getitem_3017 = copy__57 = None
+	        copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018);  arg58_1 = getitem_3018 = copy__58 = None
+	        copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019);  arg59_1 = getitem_3019 = copy__59 = None
+	        copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020);  arg60_1 = getitem_3020 = copy__60 = None
+	        copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021);  arg61_1 = getitem_3021 = copy__61 = None
+	        copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022);  arg62_1 = getitem_3022 = copy__62 = None
+	        copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023);  arg63_1 = getitem_3023 = copy__63 = None
+	        copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024);  arg64_1 = getitem_3024 = copy__64 = None
+	        copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025);  arg65_1 = getitem_3025 = copy__65 = None
+	        copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026);  arg66_1 = getitem_3026 = copy__66 = None
+	        copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027);  arg67_1 = getitem_3027 = copy__67 = None
+	        copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028);  arg68_1 = getitem_3028 = copy__68 = None
+	        copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029);  arg69_1 = getitem_3029 = copy__69 = None
+	        copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030);  arg70_1 = getitem_3030 = copy__70 = None
+	        copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031);  arg71_1 = getitem_3031 = copy__71 = None
+	        copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032);  arg72_1 = getitem_3032 = copy__72 = None
+	        copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033);  arg73_1 = getitem_3033 = copy__73 = None
+	        copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034);  arg74_1 = getitem_3034 = copy__74 = None
+	        copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035);  arg75_1 = getitem_3035 = copy__75 = None
+	        copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036);  arg76_1 = getitem_3036 = copy__76 = None
+	        copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037);  arg77_1 = getitem_3037 = copy__77 = None
+	        copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038);  arg78_1 = getitem_3038 = copy__78 = None
+	        copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039);  arg79_1 = getitem_3039 = copy__79 = None
+	        copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040);  arg80_1 = getitem_3040 = copy__80 = None
+	        copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041);  arg81_1 = getitem_3041 = copy__81 = None
+	        copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042);  arg82_1 = getitem_3042 = copy__82 = None
+	        copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043);  arg83_1 = getitem_3043 = copy__83 = None
+	        copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044);  arg84_1 = getitem_3044 = copy__84 = None
+	        copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045);  arg85_1 = getitem_3045 = copy__85 = None
+	        copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046);  arg86_1 = getitem_3046 = copy__86 = None
+	        copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047);  arg87_1 = getitem_3047 = copy__87 = None
+	        copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048);  arg88_1 = getitem_3048 = copy__88 = None
+	        copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049);  arg89_1 = getitem_3049 = copy__89 = None
+	        copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050);  arg90_1 = getitem_3050 = copy__90 = None
+	        copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051);  arg91_1 = getitem_3051 = copy__91 = None
+	        copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052);  arg92_1 = getitem_3052 = copy__92 = None
+	        copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053);  arg93_1 = getitem_3053 = copy__93 = None
+	        copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054);  arg94_1 = getitem_3054 = copy__94 = None
+	        copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055);  arg95_1 = getitem_3055 = copy__95 = None
+	        copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056);  arg96_1 = getitem_3056 = copy__96 = None
+	        copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057);  arg97_1 = getitem_3057 = copy__97 = None
+	        copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058);  arg98_1 = getitem_3058 = copy__98 = None
+	        copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059);  arg99_1 = getitem_3059 = copy__99 = None
+	        copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060);  arg100_1 = getitem_3060 = copy__100 = None
+	        copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061);  arg101_1 = getitem_3061 = copy__101 = None
+	        copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062);  arg102_1 = getitem_3062 = copy__102 = None
+	        copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063);  arg103_1 = getitem_3063 = copy__103 = None
+	        copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064);  arg104_1 = getitem_3064 = copy__104 = None
+	        copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065);  arg105_1 = getitem_3065 = copy__105 = None
+	        copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066);  arg106_1 = getitem_3066 = copy__106 = None
+	        copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067);  arg107_1 = getitem_3067 = copy__107 = None
+	        copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068);  arg108_1 = getitem_3068 = copy__108 = None
+	        copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069);  arg109_1 = getitem_3069 = copy__109 = None
+	        copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070);  arg110_1 = getitem_3070 = copy__110 = None
+	        copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071);  arg111_1 = getitem_3071 = copy__111 = None
+	        copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072);  arg112_1 = getitem_3072 = copy__112 = None
+	        copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073);  arg113_1 = getitem_3073 = copy__113 = None
+	        copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074);  arg114_1 = getitem_3074 = copy__114 = None
+	        copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075);  arg115_1 = getitem_3075 = copy__115 = None
+	        copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076);  arg116_1 = getitem_3076 = copy__116 = None
+	        copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077);  arg117_1 = getitem_3077 = copy__117 = None
+	        copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078);  arg118_1 = getitem_3078 = copy__118 = None
+	        copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079);  arg119_1 = getitem_3079 = copy__119 = None
+	        copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080);  arg120_1 = getitem_3080 = copy__120 = None
+	        copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081);  arg121_1 = getitem_3081 = copy__121 = None
+	        copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082);  arg122_1 = getitem_3082 = copy__122 = None
+	        copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083);  arg123_1 = getitem_3083 = copy__123 = None
+	        copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084);  arg124_1 = getitem_3084 = copy__124 = None
+	        copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085);  arg125_1 = getitem_3085 = copy__125 = None
+	        copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086);  arg126_1 = getitem_3086 = copy__126 = None
+	        copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087);  arg127_1 = getitem_3087 = copy__127 = None
+	        copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088);  arg128_1 = getitem_3088 = copy__128 = None
+	        copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089);  arg129_1 = getitem_3089 = copy__129 = None
+	        copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090);  arg130_1 = getitem_3090 = copy__130 = None
+	        copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091);  arg131_1 = getitem_3091 = copy__131 = None
+	        copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092);  arg132_1 = getitem_3092 = copy__132 = None
+	        copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093);  arg133_1 = getitem_3093 = copy__133 = None
+	        copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094);  arg134_1 = getitem_3094 = copy__134 = None
+	        copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095);  arg135_1 = getitem_3095 = copy__135 = None
+	        copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096);  arg136_1 = getitem_3096 = copy__136 = None
+	        copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097);  arg137_1 = getitem_3097 = copy__137 = None
+	        copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098);  arg138_1 = getitem_3098 = copy__138 = None
+	        copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099);  arg139_1 = getitem_3099 = copy__139 = None
+	        copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100);  arg140_1 = getitem_3100 = copy__140 = None
+	        copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101);  arg141_1 = getitem_3101 = copy__141 = None
+	        copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102);  arg142_1 = getitem_3102 = copy__142 = None
+	        copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103);  arg143_1 = getitem_3103 = copy__143 = None
+	        copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104);  arg144_1 = getitem_3104 = copy__144 = None
+	        copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105);  arg145_1 = getitem_3105 = copy__145 = None
+	        copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106);  arg146_1 = getitem_3106 = copy__146 = None
+	        copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107);  arg147_1 = getitem_3107 = copy__147 = None
+	        copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1);  arg148_1 = getitem_1 = copy__148 = None
+	        copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445);  arg149_1 = getitem_445 = copy__149 = None
+	        copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889);  arg150_1 = getitem_889 = copy__150 = None
+	        copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444);  arg299_1 = getitem_444 = copy__151 = None
+	        copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446);  arg300_1 = getitem_446 = copy__152 = None
+	        copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447);  arg301_1 = getitem_447 = copy__153 = None
+	        copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448);  arg302_1 = getitem_448 = copy__154 = None
+	        copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449);  arg303_1 = getitem_449 = copy__155 = None
+	        copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450);  arg304_1 = getitem_450 = copy__156 = None
+	        copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451);  arg305_1 = getitem_451 = copy__157 = None
+	        copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452);  arg306_1 = getitem_452 = copy__158 = None
+	        copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453);  arg307_1 = getitem_453 = copy__159 = None
+	        copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454);  arg308_1 = getitem_454 = copy__160 = None
+	        copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455);  arg309_1 = getitem_455 = copy__161 = None
+	        copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456);  arg310_1 = getitem_456 = copy__162 = None
+	        copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457);  arg311_1 = getitem_457 = copy__163 = None
+	        copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458);  arg312_1 = getitem_458 = copy__164 = None
+	        copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459);  arg313_1 = getitem_459 = copy__165 = None
+	        copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460);  arg314_1 = getitem_460 = copy__166 = None
+	        copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461);  arg315_1 = getitem_461 = copy__167 = None
+	        copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462);  arg316_1 = getitem_462 = copy__168 = None
+	        copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463);  arg317_1 = getitem_463 = copy__169 = None
+	        copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464);  arg318_1 = getitem_464 = copy__170 = None
+	        copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465);  arg319_1 = getitem_465 = copy__171 = None
+	        copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466);  arg320_1 = getitem_466 = copy__172 = None
+	        copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467);  arg321_1 = getitem_467 = copy__173 = None
+	        copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468);  arg322_1 = getitem_468 = copy__174 = None
+	        copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469);  arg323_1 = getitem_469 = copy__175 = None
+	        copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470);  arg324_1 = getitem_470 = copy__176 = None
+	        copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471);  arg325_1 = getitem_471 = copy__177 = None
+	        copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472);  arg326_1 = getitem_472 = copy__178 = None
+	        copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473);  arg327_1 = getitem_473 = copy__179 = None
+	        copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474);  arg328_1 = getitem_474 = copy__180 = None
+	        copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475);  arg329_1 = getitem_475 = copy__181 = None
+	        copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476);  arg330_1 = getitem_476 = copy__182 = None
+	        copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477);  arg331_1 = getitem_477 = copy__183 = None
+	        copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478);  arg332_1 = getitem_478 = copy__184 = None
+	        copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479);  arg333_1 = getitem_479 = copy__185 = None
+	        copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480);  arg334_1 = getitem_480 = copy__186 = None
+	        copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481);  arg335_1 = getitem_481 = copy__187 = None
+	        copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482);  arg336_1 = getitem_482 = copy__188 = None
+	        copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483);  arg337_1 = getitem_483 = copy__189 = None
+	        copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484);  arg338_1 = getitem_484 = copy__190 = None
+	        copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485);  arg339_1 = getitem_485 = copy__191 = None
+	        copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486);  arg340_1 = getitem_486 = copy__192 = None
+	        copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487);  arg341_1 = getitem_487 = copy__193 = None
+	        copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488);  arg342_1 = getitem_488 = copy__194 = None
+	        copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489);  arg343_1 = getitem_489 = copy__195 = None
+	        copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490);  arg344_1 = getitem_490 = copy__196 = None
+	        copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491);  arg345_1 = getitem_491 = copy__197 = None
+	        copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492);  arg346_1 = getitem_492 = copy__198 = None
+	        copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493);  arg347_1 = getitem_493 = copy__199 = None
+	        copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494);  arg348_1 = getitem_494 = copy__200 = None
+	        copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495);  arg349_1 = getitem_495 = copy__201 = None
+	        copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496);  arg350_1 = getitem_496 = copy__202 = None
+	        copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497);  arg351_1 = getitem_497 = copy__203 = None
+	        copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498);  arg352_1 = getitem_498 = copy__204 = None
+	        copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499);  arg353_1 = getitem_499 = copy__205 = None
+	        copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500);  arg354_1 = getitem_500 = copy__206 = None
+	        copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501);  arg355_1 = getitem_501 = copy__207 = None
+	        copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502);  arg356_1 = getitem_502 = copy__208 = None
+	        copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503);  arg357_1 = getitem_503 = copy__209 = None
+	        copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504);  arg358_1 = getitem_504 = copy__210 = None
+	        copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505);  arg359_1 = getitem_505 = copy__211 = None
+	        copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506);  arg360_1 = getitem_506 = copy__212 = None
+	        copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507);  arg361_1 = getitem_507 = copy__213 = None
+	        copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508);  arg362_1 = getitem_508 = copy__214 = None
+	        copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509);  arg363_1 = getitem_509 = copy__215 = None
+	        copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510);  arg364_1 = getitem_510 = copy__216 = None
+	        copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511);  arg365_1 = getitem_511 = copy__217 = None
+	        copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512);  arg366_1 = getitem_512 = copy__218 = None
+	        copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513);  arg367_1 = getitem_513 = copy__219 = None
+	        copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514);  arg368_1 = getitem_514 = copy__220 = None
+	        copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515);  arg369_1 = getitem_515 = copy__221 = None
+	        copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516);  arg370_1 = getitem_516 = copy__222 = None
+	        copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517);  arg371_1 = getitem_517 = copy__223 = None
+	        copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518);  arg372_1 = getitem_518 = copy__224 = None
+	        copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519);  arg373_1 = getitem_519 = copy__225 = None
+	        copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520);  arg374_1 = getitem_520 = copy__226 = None
+	        copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521);  arg375_1 = getitem_521 = copy__227 = None
+	        copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522);  arg376_1 = getitem_522 = copy__228 = None
+	        copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523);  arg377_1 = getitem_523 = copy__229 = None
+	        copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524);  arg378_1 = getitem_524 = copy__230 = None
+	        copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525);  arg379_1 = getitem_525 = copy__231 = None
+	        copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526);  arg380_1 = getitem_526 = copy__232 = None
+	        copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527);  arg381_1 = getitem_527 = copy__233 = None
+	        copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528);  arg382_1 = getitem_528 = copy__234 = None
+	        copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529);  arg383_1 = getitem_529 = copy__235 = None
+	        copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530);  arg384_1 = getitem_530 = copy__236 = None
+	        copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531);  arg385_1 = getitem_531 = copy__237 = None
+	        copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532);  arg386_1 = getitem_532 = copy__238 = None
+	        copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533);  arg387_1 = getitem_533 = copy__239 = None
+	        copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534);  arg388_1 = getitem_534 = copy__240 = None
+	        copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535);  arg389_1 = getitem_535 = copy__241 = None
+	        copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536);  arg390_1 = getitem_536 = copy__242 = None
+	        copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537);  arg391_1 = getitem_537 = copy__243 = None
+	        copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538);  arg392_1 = getitem_538 = copy__244 = None
+	        copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539);  arg393_1 = getitem_539 = copy__245 = None
+	        copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540);  arg394_1 = getitem_540 = copy__246 = None
+	        copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541);  arg395_1 = getitem_541 = copy__247 = None
+	        copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542);  arg396_1 = getitem_542 = copy__248 = None
+	        copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543);  arg397_1 = getitem_543 = copy__249 = None
+	        copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544);  arg398_1 = getitem_544 = copy__250 = None
+	        copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545);  arg399_1 = getitem_545 = copy__251 = None
+	        copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546);  arg400_1 = getitem_546 = copy__252 = None
+	        copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547);  arg401_1 = getitem_547 = copy__253 = None
+	        copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548);  arg402_1 = getitem_548 = copy__254 = None
+	        copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549);  arg403_1 = getitem_549 = copy__255 = None
+	        copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550);  arg404_1 = getitem_550 = copy__256 = None
+	        copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551);  arg405_1 = getitem_551 = copy__257 = None
+	        copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552);  arg406_1 = getitem_552 = copy__258 = None
+	        copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553);  arg407_1 = getitem_553 = copy__259 = None
+	        copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554);  arg408_1 = getitem_554 = copy__260 = None
+	        copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555);  arg409_1 = getitem_555 = copy__261 = None
+	        copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556);  arg410_1 = getitem_556 = copy__262 = None
+	        copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557);  arg411_1 = getitem_557 = copy__263 = None
+	        copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558);  arg412_1 = getitem_558 = copy__264 = None
+	        copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559);  arg413_1 = getitem_559 = copy__265 = None
+	        copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560);  arg414_1 = getitem_560 = copy__266 = None
+	        copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561);  arg415_1 = getitem_561 = copy__267 = None
+	        copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562);  arg416_1 = getitem_562 = copy__268 = None
+	        copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563);  arg417_1 = getitem_563 = copy__269 = None
+	        copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564);  arg418_1 = getitem_564 = copy__270 = None
+	        copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565);  arg419_1 = getitem_565 = copy__271 = None
+	        copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566);  arg420_1 = getitem_566 = copy__272 = None
+	        copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567);  arg421_1 = getitem_567 = copy__273 = None
+	        copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568);  arg422_1 = getitem_568 = copy__274 = None
+	        copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569);  arg423_1 = getitem_569 = copy__275 = None
+	        copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570);  arg424_1 = getitem_570 = copy__276 = None
+	        copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571);  arg425_1 = getitem_571 = copy__277 = None
+	        copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572);  arg426_1 = getitem_572 = copy__278 = None
+	        copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573);  arg427_1 = getitem_573 = copy__279 = None
+	        copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574);  arg428_1 = getitem_574 = copy__280 = None
+	        copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575);  arg429_1 = getitem_575 = copy__281 = None
+	        copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576);  arg430_1 = getitem_576 = copy__282 = None
+	        copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577);  arg431_1 = getitem_577 = copy__283 = None
+	        copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578);  arg432_1 = getitem_578 = copy__284 = None
+	        copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579);  arg433_1 = getitem_579 = copy__285 = None
+	        copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580);  arg434_1 = getitem_580 = copy__286 = None
+	        copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581);  arg435_1 = getitem_581 = copy__287 = None
+	        copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582);  arg436_1 = getitem_582 = copy__288 = None
+	        copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583);  arg437_1 = getitem_583 = copy__289 = None
+	        copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584);  arg438_1 = getitem_584 = copy__290 = None
+	        copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585);  arg439_1 = getitem_585 = copy__291 = None
+	        copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586);  arg440_1 = getitem_586 = copy__292 = None
+	        copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587);  arg441_1 = getitem_587 = copy__293 = None
+	        copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588);  arg442_1 = getitem_588 = copy__294 = None
+	        copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589);  arg443_1 = getitem_589 = copy__295 = None
+	        copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590);  arg444_1 = getitem_590 = copy__296 = None
+	        copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591);  arg445_1 = getitem_591 = copy__297 = None
+	        copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888);  arg446_1 = getitem_888 = copy__298 = None
+	        copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890);  arg447_1 = getitem_890 = copy__299 = None
+	        copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891);  arg448_1 = getitem_891 = copy__300 = None
+	        copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892);  arg449_1 = getitem_892 = copy__301 = None
+	        copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893);  arg450_1 = getitem_893 = copy__302 = None
+	        copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894);  arg451_1 = getitem_894 = copy__303 = None
+	        copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895);  arg452_1 = getitem_895 = copy__304 = None
+	        copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896);  arg453_1 = getitem_896 = copy__305 = None
+	        copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897);  arg454_1 = getitem_897 = copy__306 = None
+	        copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898);  arg455_1 = getitem_898 = copy__307 = None
+	        copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899);  arg456_1 = getitem_899 = copy__308 = None
+	        copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900);  arg457_1 = getitem_900 = copy__309 = None
+	        copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901);  arg458_1 = getitem_901 = copy__310 = None
+	        copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902);  arg459_1 = getitem_902 = copy__311 = None
+	        copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903);  arg460_1 = getitem_903 = copy__312 = None
+	        copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904);  arg461_1 = getitem_904 = copy__313 = None
+	        copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905);  arg462_1 = getitem_905 = copy__314 = None
+	        copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906);  arg463_1 = getitem_906 = copy__315 = None
+	        copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907);  arg464_1 = getitem_907 = copy__316 = None
+	        copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908);  arg465_1 = getitem_908 = copy__317 = None
+	        copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909);  arg466_1 = getitem_909 = copy__318 = None
+	        copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910);  arg467_1 = getitem_910 = copy__319 = None
+	        copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911);  arg468_1 = getitem_911 = copy__320 = None
+	        copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912);  arg469_1 = getitem_912 = copy__321 = None
+	        copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913);  arg470_1 = getitem_913 = copy__322 = None
+	        copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914);  arg471_1 = getitem_914 = copy__323 = None
+	        copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915);  arg472_1 = getitem_915 = copy__324 = None
+	        copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916);  arg473_1 = getitem_916 = copy__325 = None
+	        copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917);  arg474_1 = getitem_917 = copy__326 = None
+	        copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918);  arg475_1 = getitem_918 = copy__327 = None
+	        copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919);  arg476_1 = getitem_919 = copy__328 = None
+	        copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920);  arg477_1 = getitem_920 = copy__329 = None
+	        copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921);  arg478_1 = getitem_921 = copy__330 = None
+	        copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922);  arg479_1 = getitem_922 = copy__331 = None
+	        copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923);  arg480_1 = getitem_923 = copy__332 = None
+	        copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924);  arg481_1 = getitem_924 = copy__333 = None
+	        copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925);  arg482_1 = getitem_925 = copy__334 = None
+	        copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926);  arg483_1 = getitem_926 = copy__335 = None
+	        copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927);  arg484_1 = getitem_927 = copy__336 = None
+	        copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928);  arg485_1 = getitem_928 = copy__337 = None
+	        copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929);  arg486_1 = getitem_929 = copy__338 = None
+	        copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930);  arg487_1 = getitem_930 = copy__339 = None
+	        copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931);  arg488_1 = getitem_931 = copy__340 = None
+	        copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932);  arg489_1 = getitem_932 = copy__341 = None
+	        copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933);  arg490_1 = getitem_933 = copy__342 = None
+	        copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934);  arg491_1 = getitem_934 = copy__343 = None
+	        copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935);  arg492_1 = getitem_935 = copy__344 = None
+	        copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936);  arg493_1 = getitem_936 = copy__345 = None
+	        copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937);  arg494_1 = getitem_937 = copy__346 = None
+	        copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938);  arg495_1 = getitem_938 = copy__347 = None
+	        copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939);  arg496_1 = getitem_939 = copy__348 = None
+	        copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940);  arg497_1 = getitem_940 = copy__349 = None
+	        copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941);  arg498_1 = getitem_941 = copy__350 = None
+	        copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942);  arg499_1 = getitem_942 = copy__351 = None
+	        copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943);  arg500_1 = getitem_943 = copy__352 = None
+	        copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944);  arg501_1 = getitem_944 = copy__353 = None
+	        copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945);  arg502_1 = getitem_945 = copy__354 = None
+	        copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946);  arg503_1 = getitem_946 = copy__355 = None
+	        copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947);  arg504_1 = getitem_947 = copy__356 = None
+	        copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948);  arg505_1 = getitem_948 = copy__357 = None
+	        copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949);  arg506_1 = getitem_949 = copy__358 = None
+	        copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950);  arg507_1 = getitem_950 = copy__359 = None
+	        copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951);  arg508_1 = getitem_951 = copy__360 = None
+	        copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952);  arg509_1 = getitem_952 = copy__361 = None
+	        copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953);  arg510_1 = getitem_953 = copy__362 = None
+	        copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954);  arg511_1 = getitem_954 = copy__363 = None
+	        copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955);  arg512_1 = getitem_955 = copy__364 = None
+	        copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956);  arg513_1 = getitem_956 = copy__365 = None
+	        copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957);  arg514_1 = getitem_957 = copy__366 = None
+	        copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958);  arg515_1 = getitem_958 = copy__367 = None
+	        copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959);  arg516_1 = getitem_959 = copy__368 = None
+	        copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960);  arg517_1 = getitem_960 = copy__369 = None
+	        copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961);  arg518_1 = getitem_961 = copy__370 = None
+	        copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962);  arg519_1 = getitem_962 = copy__371 = None
+	        copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963);  arg520_1 = getitem_963 = copy__372 = None
+	        copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964);  arg521_1 = getitem_964 = copy__373 = None
+	        copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965);  arg522_1 = getitem_965 = copy__374 = None
+	        copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966);  arg523_1 = getitem_966 = copy__375 = None
+	        copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967);  arg524_1 = getitem_967 = copy__376 = None
+	        copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968);  arg525_1 = getitem_968 = copy__377 = None
+	        copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969);  arg526_1 = getitem_969 = copy__378 = None
+	        copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970);  arg527_1 = getitem_970 = copy__379 = None
+	        copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971);  arg528_1 = getitem_971 = copy__380 = None
+	        copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972);  arg529_1 = getitem_972 = copy__381 = None
+	        copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973);  arg530_1 = getitem_973 = copy__382 = None
+	        copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974);  arg531_1 = getitem_974 = copy__383 = None
+	        copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975);  arg532_1 = getitem_975 = copy__384 = None
+	        copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976);  arg533_1 = getitem_976 = copy__385 = None
+	        copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977);  arg534_1 = getitem_977 = copy__386 = None
+	        copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978);  arg535_1 = getitem_978 = copy__387 = None
+	        copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979);  arg536_1 = getitem_979 = copy__388 = None
+	        copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980);  arg537_1 = getitem_980 = copy__389 = None
+	        copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981);  arg538_1 = getitem_981 = copy__390 = None
+	        copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982);  arg539_1 = getitem_982 = copy__391 = None
+	        copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983);  arg540_1 = getitem_983 = copy__392 = None
+	        copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984);  arg541_1 = getitem_984 = copy__393 = None
+	        copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985);  arg542_1 = getitem_985 = copy__394 = None
+	        copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986);  arg543_1 = getitem_986 = copy__395 = None
+	        copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987);  arg544_1 = getitem_987 = copy__396 = None
+	        copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988);  arg545_1 = getitem_988 = copy__397 = None
+	        copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989);  arg546_1 = getitem_989 = copy__398 = None
+	        copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990);  arg547_1 = getitem_990 = copy__399 = None
+	        copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991);  arg548_1 = getitem_991 = copy__400 = None
+	        copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992);  arg549_1 = getitem_992 = copy__401 = None
+	        copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993);  arg550_1 = getitem_993 = copy__402 = None
+	        copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994);  arg551_1 = getitem_994 = copy__403 = None
+	        copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995);  arg552_1 = getitem_995 = copy__404 = None
+	        copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996);  arg553_1 = getitem_996 = copy__405 = None
+	        copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997);  arg554_1 = getitem_997 = copy__406 = None
+	        copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998);  arg555_1 = getitem_998 = copy__407 = None
+	        copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999);  arg556_1 = getitem_999 = copy__408 = None
+	        copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000);  arg557_1 = getitem_1000 = copy__409 = None
+	        copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001);  arg558_1 = getitem_1001 = copy__410 = None
+	        copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002);  arg559_1 = getitem_1002 = copy__411 = None
+	        copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003);  arg560_1 = getitem_1003 = copy__412 = None
+	        copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004);  arg561_1 = getitem_1004 = copy__413 = None
+	        copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005);  arg562_1 = getitem_1005 = copy__414 = None
+	        copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006);  arg563_1 = getitem_1006 = copy__415 = None
+	        copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007);  arg564_1 = getitem_1007 = copy__416 = None
+	        copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008);  arg565_1 = getitem_1008 = copy__417 = None
+	        copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009);  arg566_1 = getitem_1009 = copy__418 = None
+	        copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010);  arg567_1 = getitem_1010 = copy__419 = None
+	        copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011);  arg568_1 = getitem_1011 = copy__420 = None
+	        copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012);  arg569_1 = getitem_1012 = copy__421 = None
+	        copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013);  arg570_1 = getitem_1013 = copy__422 = None
+	        copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014);  arg571_1 = getitem_1014 = copy__423 = None
+	        copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015);  arg572_1 = getitem_1015 = copy__424 = None
+	        copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016);  arg573_1 = getitem_1016 = copy__425 = None
+	        copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017);  arg574_1 = getitem_1017 = copy__426 = None
+	        copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018);  arg575_1 = getitem_1018 = copy__427 = None
+	        copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019);  arg576_1 = getitem_1019 = copy__428 = None
+	        copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020);  arg577_1 = getitem_1020 = copy__429 = None
+	        copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021);  arg578_1 = getitem_1021 = copy__430 = None
+	        copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022);  arg579_1 = getitem_1022 = copy__431 = None
+	        copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023);  arg580_1 = getitem_1023 = copy__432 = None
+	        copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024);  arg581_1 = getitem_1024 = copy__433 = None
+	        copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025);  arg582_1 = getitem_1025 = copy__434 = None
+	        copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026);  arg583_1 = getitem_1026 = copy__435 = None
+	        copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027);  arg584_1 = getitem_1027 = copy__436 = None
+	        copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028);  arg585_1 = getitem_1028 = copy__437 = None
+	        copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029);  arg586_1 = getitem_1029 = copy__438 = None
+	        copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030);  arg587_1 = getitem_1030 = copy__439 = None
+	        copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031);  arg588_1 = getitem_1031 = copy__440 = None
+	        copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032);  arg589_1 = getitem_1032 = copy__441 = None
+	        copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033);  arg590_1 = getitem_1033 = copy__442 = None
+	        copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034);  arg591_1 = getitem_1034 = copy__443 = None
+	        copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035);  arg592_1 = getitem_1035 = copy__444 = None
+	        copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem);  arg593_1 = getitem = copy__445 = None
+	        copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2);  arg594_1 = getitem_2 = copy__446 = None
+	        copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3);  arg595_1 = getitem_3 = copy__447 = None
+	        copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4);  arg596_1 = getitem_4 = copy__448 = None
+	        copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5);  arg597_1 = getitem_5 = copy__449 = None
+	        copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6);  arg598_1 = getitem_6 = copy__450 = None
+	        copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7);  arg599_1 = getitem_7 = copy__451 = None
+	        copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8);  arg600_1 = getitem_8 = copy__452 = None
+	        copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9);  arg601_1 = getitem_9 = copy__453 = None
+	        copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10);  arg602_1 = getitem_10 = copy__454 = None
+	        copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11);  arg603_1 = getitem_11 = copy__455 = None
+	        copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12);  arg604_1 = getitem_12 = copy__456 = None
+	        copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13);  arg605_1 = getitem_13 = copy__457 = None
+	        copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14);  arg606_1 = getitem_14 = copy__458 = None
+	        copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15);  arg607_1 = getitem_15 = copy__459 = None
+	        copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16);  arg608_1 = getitem_16 = copy__460 = None
+	        copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17);  arg609_1 = getitem_17 = copy__461 = None
+	        copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18);  arg610_1 = getitem_18 = copy__462 = None
+	        copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19);  arg611_1 = getitem_19 = copy__463 = None
+	        copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20);  arg612_1 = getitem_20 = copy__464 = None
+	        copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21);  arg613_1 = getitem_21 = copy__465 = None
+	        copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22);  arg614_1 = getitem_22 = copy__466 = None
+	        copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23);  arg615_1 = getitem_23 = copy__467 = None
+	        copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24);  arg616_1 = getitem_24 = copy__468 = None
+	        copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25);  arg617_1 = getitem_25 = copy__469 = None
+	        copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26);  arg618_1 = getitem_26 = copy__470 = None
+	        copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27);  arg619_1 = getitem_27 = copy__471 = None
+	        copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28);  arg620_1 = getitem_28 = copy__472 = None
+	        copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29);  arg621_1 = getitem_29 = copy__473 = None
+	        copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30);  arg622_1 = getitem_30 = copy__474 = None
+	        copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31);  arg623_1 = getitem_31 = copy__475 = None
+	        copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32);  arg624_1 = getitem_32 = copy__476 = None
+	        copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33);  arg625_1 = getitem_33 = copy__477 = None
+	        copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34);  arg626_1 = getitem_34 = copy__478 = None
+	        copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35);  arg627_1 = getitem_35 = copy__479 = None
+	        copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36);  arg628_1 = getitem_36 = copy__480 = None
+	        copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37);  arg629_1 = getitem_37 = copy__481 = None
+	        copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38);  arg630_1 = getitem_38 = copy__482 = None
+	        copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39);  arg631_1 = getitem_39 = copy__483 = None
+	        copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40);  arg632_1 = getitem_40 = copy__484 = None
+	        copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41);  arg633_1 = getitem_41 = copy__485 = None
+	        copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42);  arg634_1 = getitem_42 = copy__486 = None
+	        copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43);  arg635_1 = getitem_43 = copy__487 = None
+	        copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44);  arg636_1 = getitem_44 = copy__488 = None
+	        copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45);  arg637_1 = getitem_45 = copy__489 = None
+	        copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46);  arg638_1 = getitem_46 = copy__490 = None
+	        copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47);  arg639_1 = getitem_47 = copy__491 = None
+	        copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48);  arg640_1 = getitem_48 = copy__492 = None
+	        copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49);  arg641_1 = getitem_49 = copy__493 = None
+	        copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50);  arg642_1 = getitem_50 = copy__494 = None
+	        copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51);  arg643_1 = getitem_51 = copy__495 = None
+	        copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52);  arg644_1 = getitem_52 = copy__496 = None
+	        copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53);  arg645_1 = getitem_53 = copy__497 = None
+	        copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54);  arg646_1 = getitem_54 = copy__498 = None
+	        copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55);  arg647_1 = getitem_55 = copy__499 = None
+	        copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56);  arg648_1 = getitem_56 = copy__500 = None
+	        copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57);  arg649_1 = getitem_57 = copy__501 = None
+	        copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58);  arg650_1 = getitem_58 = copy__502 = None
+	        copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59);  arg651_1 = getitem_59 = copy__503 = None
+	        copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60);  arg652_1 = getitem_60 = copy__504 = None
+	        copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61);  arg653_1 = getitem_61 = copy__505 = None
+	        copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62);  arg654_1 = getitem_62 = copy__506 = None
+	        copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63);  arg655_1 = getitem_63 = copy__507 = None
+	        copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64);  arg656_1 = getitem_64 = copy__508 = None
+	        copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65);  arg657_1 = getitem_65 = copy__509 = None
+	        copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66);  arg658_1 = getitem_66 = copy__510 = None
+	        copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67);  arg659_1 = getitem_67 = copy__511 = None
+	        copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68);  arg660_1 = getitem_68 = copy__512 = None
+	        copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69);  arg661_1 = getitem_69 = copy__513 = None
+	        copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70);  arg662_1 = getitem_70 = copy__514 = None
+	        copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71);  arg663_1 = getitem_71 = copy__515 = None
+	        copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72);  arg664_1 = getitem_72 = copy__516 = None
+	        copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73);  arg665_1 = getitem_73 = copy__517 = None
+	        copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74);  arg666_1 = getitem_74 = copy__518 = None
+	        copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75);  arg667_1 = getitem_75 = copy__519 = None
+	        copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76);  arg668_1 = getitem_76 = copy__520 = None
+	        copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77);  arg669_1 = getitem_77 = copy__521 = None
+	        copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78);  arg670_1 = getitem_78 = copy__522 = None
+	        copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79);  arg671_1 = getitem_79 = copy__523 = None
+	        copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80);  arg672_1 = getitem_80 = copy__524 = None
+	        copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81);  arg673_1 = getitem_81 = copy__525 = None
+	        copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82);  arg674_1 = getitem_82 = copy__526 = None
+	        copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83);  arg675_1 = getitem_83 = copy__527 = None
+	        copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84);  arg676_1 = getitem_84 = copy__528 = None
+	        copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85);  arg677_1 = getitem_85 = copy__529 = None
+	        copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86);  arg678_1 = getitem_86 = copy__530 = None
+	        copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87);  arg679_1 = getitem_87 = copy__531 = None
+	        copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88);  arg680_1 = getitem_88 = copy__532 = None
+	        copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89);  arg681_1 = getitem_89 = copy__533 = None
+	        copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90);  arg682_1 = getitem_90 = copy__534 = None
+	        copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91);  arg683_1 = getitem_91 = copy__535 = None
+	        copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92);  arg684_1 = getitem_92 = copy__536 = None
+	        copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93);  arg685_1 = getitem_93 = copy__537 = None
+	        copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94);  arg686_1 = getitem_94 = copy__538 = None
+	        copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95);  arg687_1 = getitem_95 = copy__539 = None
+	        copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96);  arg688_1 = getitem_96 = copy__540 = None
+	        copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97);  arg689_1 = getitem_97 = copy__541 = None
+	        copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98);  arg690_1 = getitem_98 = copy__542 = None
+	        copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99);  arg691_1 = getitem_99 = copy__543 = None
+	        copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100);  arg692_1 = getitem_100 = copy__544 = None
+	        copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101);  arg693_1 = getitem_101 = copy__545 = None
+	        copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102);  arg694_1 = getitem_102 = copy__546 = None
+	        copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103);  arg695_1 = getitem_103 = copy__547 = None
+	        copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104);  arg696_1 = getitem_104 = copy__548 = None
+	        copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105);  arg697_1 = getitem_105 = copy__549 = None
+	        copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106);  arg698_1 = getitem_106 = copy__550 = None
+	        copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107);  arg699_1 = getitem_107 = copy__551 = None
+	        copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108);  arg700_1 = getitem_108 = copy__552 = None
+	        copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109);  arg701_1 = getitem_109 = copy__553 = None
+	        copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110);  arg702_1 = getitem_110 = copy__554 = None
+	        copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111);  arg703_1 = getitem_111 = copy__555 = None
+	        copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112);  arg704_1 = getitem_112 = copy__556 = None
+	        copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113);  arg705_1 = getitem_113 = copy__557 = None
+	        copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114);  arg706_1 = getitem_114 = copy__558 = None
+	        copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115);  arg707_1 = getitem_115 = copy__559 = None
+	        copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116);  arg708_1 = getitem_116 = copy__560 = None
+	        copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117);  arg709_1 = getitem_117 = copy__561 = None
+	        copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118);  arg710_1 = getitem_118 = copy__562 = None
+	        copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119);  arg711_1 = getitem_119 = copy__563 = None
+	        copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120);  arg712_1 = getitem_120 = copy__564 = None
+	        copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121);  arg713_1 = getitem_121 = copy__565 = None
+	        copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122);  arg714_1 = getitem_122 = copy__566 = None
+	        copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123);  arg715_1 = getitem_123 = copy__567 = None
+	        copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124);  arg716_1 = getitem_124 = copy__568 = None
+	        copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125);  arg717_1 = getitem_125 = copy__569 = None
+	        copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126);  arg718_1 = getitem_126 = copy__570 = None
+	        copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127);  arg719_1 = getitem_127 = copy__571 = None
+	        copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128);  arg720_1 = getitem_128 = copy__572 = None
+	        copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129);  arg721_1 = getitem_129 = copy__573 = None
+	        copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130);  arg722_1 = getitem_130 = copy__574 = None
+	        copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131);  arg723_1 = getitem_131 = copy__575 = None
+	        copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132);  arg724_1 = getitem_132 = copy__576 = None
+	        copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133);  arg725_1 = getitem_133 = copy__577 = None
+	        copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134);  arg726_1 = getitem_134 = copy__578 = None
+	        copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135);  arg727_1 = getitem_135 = copy__579 = None
+	        copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136);  arg728_1 = getitem_136 = copy__580 = None
+	        copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137);  arg729_1 = getitem_137 = copy__581 = None
+	        copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138);  arg730_1 = getitem_138 = copy__582 = None
+	        copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139);  arg731_1 = getitem_139 = copy__583 = None
+	        copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140);  arg732_1 = getitem_140 = copy__584 = None
+	        copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141);  arg733_1 = getitem_141 = copy__585 = None
+	        copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142);  arg734_1 = getitem_142 = copy__586 = None
+	        copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143);  arg735_1 = getitem_143 = copy__587 = None
+	        copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144);  arg736_1 = getitem_144 = copy__588 = None
+	        copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145);  arg737_1 = getitem_145 = copy__589 = None
+	        copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146);  arg738_1 = getitem_146 = copy__590 = None
+	        copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147);  arg739_1 = getitem_147 = copy__591 = None
+	        return ()
+	        
+	def load_args(reader):
+	    buf0 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf0, (50304, 768), is_leaf=True)  # arg0_1
+	    buf1 = reader.storage(None, 3145728, device=device(type='cuda', index=0))
+	    reader.tensor(buf1, (1024, 768), is_leaf=True)  # arg1_1
+	    buf2 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf2, (768,), is_leaf=True)  # arg2_1
+	    buf3 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf3, (768,), is_leaf=True)  # arg3_1
+	    buf4 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf4, (2304, 768), is_leaf=True)  # arg4_1
+	    buf5 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf5, (2304,), is_leaf=True)  # arg5_1
+	    buf6 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf6, (768, 768), is_leaf=True)  # arg6_1
+	    buf7 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf7, (768,), is_leaf=True)  # arg7_1
+	    buf8 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf8, (768,), is_leaf=True)  # arg8_1
+	    buf9 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf9, (768,), is_leaf=True)  # arg9_1
+	    buf10 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf10, (3072, 768), is_leaf=True)  # arg10_1
+	    buf11 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf11, (3072,), is_leaf=True)  # arg11_1
+	    buf12 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf12, (768, 3072), is_leaf=True)  # arg12_1
+	    buf13 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf13, (768,), is_leaf=True)  # arg13_1
+	    buf14 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf14, (768,), is_leaf=True)  # arg14_1
+	    buf15 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf15, (768,), is_leaf=True)  # arg15_1
+	    buf16 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf16, (2304, 768), is_leaf=True)  # arg16_1
+	    buf17 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf17, (2304,), is_leaf=True)  # arg17_1
+	    buf18 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf18, (768, 768), is_leaf=True)  # arg18_1
+	    buf19 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf19, (768,), is_leaf=True)  # arg19_1
+	    buf20 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf20, (768,), is_leaf=True)  # arg20_1
+	    buf21 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf21, (768,), is_leaf=True)  # arg21_1
+	    buf22 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf22, (3072, 768), is_leaf=True)  # arg22_1
+	    buf23 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf23, (3072,), is_leaf=True)  # arg23_1
+	    buf24 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf24, (768, 3072), is_leaf=True)  # arg24_1
+	    buf25 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf25, (768,), is_leaf=True)  # arg25_1
+	    buf26 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf26, (768,), is_leaf=True)  # arg26_1
+	    buf27 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf27, (768,), is_leaf=True)  # arg27_1
+	    buf28 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf28, (2304, 768), is_leaf=True)  # arg28_1
+	    buf29 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf29, (2304,), is_leaf=True)  # arg29_1
+	    buf30 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf30, (768, 768), is_leaf=True)  # arg30_1
+	    buf31 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf31, (768,), is_leaf=True)  # arg31_1
+	    buf32 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf32, (768,), is_leaf=True)  # arg32_1
+	    buf33 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf33, (768,), is_leaf=True)  # arg33_1
+	    buf34 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf34, (3072, 768), is_leaf=True)  # arg34_1
+	    buf35 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf35, (3072,), is_leaf=True)  # arg35_1
+	    buf36 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf36, (768, 3072), is_leaf=True)  # arg36_1
+	    buf37 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf37, (768,), is_leaf=True)  # arg37_1
+	    buf38 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf38, (768,), is_leaf=True)  # arg38_1
+	    buf39 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf39, (768,), is_leaf=True)  # arg39_1
+	    buf40 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf40, (2304, 768), is_leaf=True)  # arg40_1
+	    buf41 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf41, (2304,), is_leaf=True)  # arg41_1
+	    buf42 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf42, (768, 768), is_leaf=True)  # arg42_1
+	    buf43 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf43, (768,), is_leaf=True)  # arg43_1
+	    buf44 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf44, (768,), is_leaf=True)  # arg44_1
+	    buf45 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf45, (768,), is_leaf=True)  # arg45_1
+	    buf46 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf46, (3072, 768), is_leaf=True)  # arg46_1
+	    buf47 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf47, (3072,), is_leaf=True)  # arg47_1
+	    buf48 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf48, (768, 3072), is_leaf=True)  # arg48_1
+	    buf49 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf49, (768,), is_leaf=True)  # arg49_1
+	    buf50 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf50, (768,), is_leaf=True)  # arg50_1
+	    buf51 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf51, (768,), is_leaf=True)  # arg51_1
+	    buf52 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf52, (2304, 768), is_leaf=True)  # arg52_1
+	    buf53 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf53, (2304,), is_leaf=True)  # arg53_1
+	    buf54 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf54, (768, 768), is_leaf=True)  # arg54_1
+	    buf55 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf55, (768,), is_leaf=True)  # arg55_1
+	    buf56 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf56, (768,), is_leaf=True)  # arg56_1
+	    buf57 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf57, (768,), is_leaf=True)  # arg57_1
+	    buf58 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf58, (3072, 768), is_leaf=True)  # arg58_1
+	    buf59 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf59, (3072,), is_leaf=True)  # arg59_1
+	    buf60 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf60, (768, 3072), is_leaf=True)  # arg60_1
+	    buf61 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf61, (768,), is_leaf=True)  # arg61_1
+	    buf62 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf62, (768,), is_leaf=True)  # arg62_1
+	    buf63 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf63, (768,), is_leaf=True)  # arg63_1
+	    buf64 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf64, (2304, 768), is_leaf=True)  # arg64_1
+	    buf65 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf65, (2304,), is_leaf=True)  # arg65_1
+	    buf66 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf66, (768, 768), is_leaf=True)  # arg66_1
+	    buf67 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf67, (768,), is_leaf=True)  # arg67_1
+	    buf68 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf68, (768,), is_leaf=True)  # arg68_1
+	    buf69 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf69, (768,), is_leaf=True)  # arg69_1
+	    buf70 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf70, (3072, 768), is_leaf=True)  # arg70_1
+	    buf71 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf71, (3072,), is_leaf=True)  # arg71_1
+	    buf72 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf72, (768, 3072), is_leaf=True)  # arg72_1
+	    buf73 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf73, (768,), is_leaf=True)  # arg73_1
+	    buf74 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf74, (768,), is_leaf=True)  # arg74_1
+	    buf75 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf75, (768,), is_leaf=True)  # arg75_1
+	    buf76 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf76, (2304, 768), is_leaf=True)  # arg76_1
+	    buf77 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf77, (2304,), is_leaf=True)  # arg77_1
+	    buf78 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf78, (768, 768), is_leaf=True)  # arg78_1
+	    buf79 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf79, (768,), is_leaf=True)  # arg79_1
+	    buf80 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf80, (768,), is_leaf=True)  # arg80_1
+	    buf81 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf81, (768,), is_leaf=True)  # arg81_1
+	    buf82 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf82, (3072, 768), is_leaf=True)  # arg82_1
+	    buf83 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf83, (3072,), is_leaf=True)  # arg83_1
+	    buf84 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf84, (768, 3072), is_leaf=True)  # arg84_1
+	    buf85 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf85, (768,), is_leaf=True)  # arg85_1
+	    buf86 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf86, (768,), is_leaf=True)  # arg86_1
+	    buf87 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf87, (768,), is_leaf=True)  # arg87_1
+	    buf88 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf88, (2304, 768), is_leaf=True)  # arg88_1
+	    buf89 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf89, (2304,), is_leaf=True)  # arg89_1
+	    buf90 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf90, (768, 768), is_leaf=True)  # arg90_1
+	    buf91 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf91, (768,), is_leaf=True)  # arg91_1
+	    buf92 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf92, (768,), is_leaf=True)  # arg92_1
+	    buf93 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf93, (768,), is_leaf=True)  # arg93_1
+	    buf94 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf94, (3072, 768), is_leaf=True)  # arg94_1
+	    buf95 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf95, (3072,), is_leaf=True)  # arg95_1
+	    buf96 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf96, (768, 3072), is_leaf=True)  # arg96_1
+	    buf97 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf97, (768,), is_leaf=True)  # arg97_1
+	    buf98 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf98, (768,), is_leaf=True)  # arg98_1
+	    buf99 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf99, (768,), is_leaf=True)  # arg99_1
+	    buf100 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf100, (2304, 768), is_leaf=True)  # arg100_1
+	    buf101 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf101, (2304,), is_leaf=True)  # arg101_1
+	    buf102 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf102, (768, 768), is_leaf=True)  # arg102_1
+	    buf103 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf103, (768,), is_leaf=True)  # arg103_1
+	    buf104 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf104, (768,), is_leaf=True)  # arg104_1
+	    buf105 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf105, (768,), is_leaf=True)  # arg105_1
+	    buf106 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf106, (3072, 768), is_leaf=True)  # arg106_1
+	    buf107 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf107, (3072,), is_leaf=True)  # arg107_1
+	    buf108 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf108, (768, 3072), is_leaf=True)  # arg108_1
+	    buf109 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf109, (768,), is_leaf=True)  # arg109_1
+	    buf110 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf110, (768,), is_leaf=True)  # arg110_1
+	    buf111 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf111, (768,), is_leaf=True)  # arg111_1
+	    buf112 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf112, (2304, 768), is_leaf=True)  # arg112_1
+	    buf113 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf113, (2304,), is_leaf=True)  # arg113_1
+	    buf114 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf114, (768, 768), is_leaf=True)  # arg114_1
+	    buf115 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf115, (768,), is_leaf=True)  # arg115_1
+	    buf116 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf116, (768,), is_leaf=True)  # arg116_1
+	    buf117 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf117, (768,), is_leaf=True)  # arg117_1
+	    buf118 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf118, (3072, 768), is_leaf=True)  # arg118_1
+	    buf119 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf119, (3072,), is_leaf=True)  # arg119_1
+	    buf120 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf120, (768, 3072), is_leaf=True)  # arg120_1
+	    buf121 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf121, (768,), is_leaf=True)  # arg121_1
+	    buf122 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf122, (768,), is_leaf=True)  # arg122_1
+	    buf123 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf123, (768,), is_leaf=True)  # arg123_1
+	    buf124 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf124, (2304, 768), is_leaf=True)  # arg124_1
+	    buf125 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf125, (2304,), is_leaf=True)  # arg125_1
+	    buf126 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf126, (768, 768), is_leaf=True)  # arg126_1
+	    buf127 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf127, (768,), is_leaf=True)  # arg127_1
+	    buf128 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf128, (768,), is_leaf=True)  # arg128_1
+	    buf129 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf129, (768,), is_leaf=True)  # arg129_1
+	    buf130 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf130, (3072, 768), is_leaf=True)  # arg130_1
+	    buf131 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf131, (3072,), is_leaf=True)  # arg131_1
+	    buf132 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf132, (768, 3072), is_leaf=True)  # arg132_1
+	    buf133 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf133, (768,), is_leaf=True)  # arg133_1
+	    buf134 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf134, (768,), is_leaf=True)  # arg134_1
+	    buf135 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf135, (768,), is_leaf=True)  # arg135_1
+	    buf136 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf136, (2304, 768), is_leaf=True)  # arg136_1
+	    buf137 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf137, (2304,), is_leaf=True)  # arg137_1
+	    buf138 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf138, (768, 768), is_leaf=True)  # arg138_1
+	    buf139 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf139, (768,), is_leaf=True)  # arg139_1
+	    buf140 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf140, (768,), is_leaf=True)  # arg140_1
+	    buf141 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf141, (768,), is_leaf=True)  # arg141_1
+	    buf142 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf142, (3072, 768), is_leaf=True)  # arg142_1
+	    buf143 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf143, (3072,), is_leaf=True)  # arg143_1
+	    buf144 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf144, (768, 3072), is_leaf=True)  # arg144_1
+	    buf145 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf145, (768,), is_leaf=True)  # arg145_1
+	    buf146 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf146, (768,), is_leaf=True)  # arg146_1
+	    buf147 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf147, (768,), is_leaf=True)  # arg147_1
+	    buf148 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf148, (), is_leaf=True)  # arg148_1
+	    buf149 = reader.storage(None, 3145728, device=device(type='cuda', index=0))
+	    reader.tensor(buf149, (1024, 768), is_leaf=True)  # arg149_1
+	    buf150 = reader.storage(None, 3145728, device=device(type='cuda', index=0))
+	    reader.tensor(buf150, (1024, 768), is_leaf=True)  # arg150_1
+	    buf151 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf151, (50304, 768), is_leaf=True)  # arg151_1
+	    buf152 = reader.storage(None, 3145728, device=device(type='cuda', index=0))
+	    reader.tensor(buf152, (1024, 768), is_leaf=True)  # arg152_1
+	    buf153 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf153, (768,), is_leaf=True)  # arg153_1
+	    buf154 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf154, (768,), is_leaf=True)  # arg154_1
+	    buf155 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf155, (2304, 768), is_leaf=True)  # arg155_1
+	    buf156 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf156, (2304,), is_leaf=True)  # arg156_1
+	    buf157 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf157, (768, 768), is_leaf=True)  # arg157_1
+	    buf158 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf158, (768,), is_leaf=True)  # arg158_1
+	    buf159 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf159, (768,), is_leaf=True)  # arg159_1
+	    buf160 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf160, (768,), is_leaf=True)  # arg160_1
+	    buf161 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf161, (3072, 768), is_leaf=True)  # arg161_1
+	    buf162 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf162, (3072,), is_leaf=True)  # arg162_1
+	    buf163 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf163, (768, 3072), is_leaf=True)  # arg163_1
+	    buf164 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf164, (768,), is_leaf=True)  # arg164_1
+	    buf165 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf165, (768,), is_leaf=True)  # arg165_1
+	    buf166 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf166, (768,), is_leaf=True)  # arg166_1
+	    buf167 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf167, (2304, 768), is_leaf=True)  # arg167_1
+	    buf168 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf168, (2304,), is_leaf=True)  # arg168_1
+	    buf169 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf169, (768, 768), is_leaf=True)  # arg169_1
+	    buf170 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf170, (768,), is_leaf=True)  # arg170_1
+	    buf171 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf171, (768,), is_leaf=True)  # arg171_1
+	    buf172 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf172, (768,), is_leaf=True)  # arg172_1
+	    buf173 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf173, (3072, 768), is_leaf=True)  # arg173_1
+	    buf174 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf174, (3072,), is_leaf=True)  # arg174_1
+	    buf175 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf175, (768, 3072), is_leaf=True)  # arg175_1
+	    buf176 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf176, (768,), is_leaf=True)  # arg176_1
+	    buf177 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf177, (768,), is_leaf=True)  # arg177_1
+	    buf178 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf178, (768,), is_leaf=True)  # arg178_1
+	    buf179 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf179, (2304, 768), is_leaf=True)  # arg179_1
+	    buf180 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf180, (2304,), is_leaf=True)  # arg180_1
+	    buf181 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf181, (768, 768), is_leaf=True)  # arg181_1
+	    buf182 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf182, (768,), is_leaf=True)  # arg182_1
+	    buf183 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf183, (768,), is_leaf=True)  # arg183_1
+	    buf184 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf184, (768,), is_leaf=True)  # arg184_1
+	    buf185 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf185, (3072, 768), is_leaf=True)  # arg185_1
+	    buf186 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf186, (3072,), is_leaf=True)  # arg186_1
+	    buf187 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf187, (768, 3072), is_leaf=True)  # arg187_1
+	    buf188 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf188, (768,), is_leaf=True)  # arg188_1
+	    buf189 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf189, (768,), is_leaf=True)  # arg189_1
+	    buf190 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf190, (768,), is_leaf=True)  # arg190_1
+	    buf191 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf191, (2304, 768), is_leaf=True)  # arg191_1
+	    buf192 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf192, (2304,), is_leaf=True)  # arg192_1
+	    buf193 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf193, (768, 768), is_leaf=True)  # arg193_1
+	    buf194 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf194, (768,), is_leaf=True)  # arg194_1
+	    buf195 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf195, (768,), is_leaf=True)  # arg195_1
+	    buf196 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf196, (768,), is_leaf=True)  # arg196_1
+	    buf197 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf197, (3072, 768), is_leaf=True)  # arg197_1
+	    buf198 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf198, (3072,), is_leaf=True)  # arg198_1
+	    buf199 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf199, (768, 3072), is_leaf=True)  # arg199_1
+	    buf200 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf200, (768,), is_leaf=True)  # arg200_1
+	    buf201 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf201, (768,), is_leaf=True)  # arg201_1
+	    buf202 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf202, (768,), is_leaf=True)  # arg202_1
+	    buf203 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf203, (2304, 768), is_leaf=True)  # arg203_1
+	    buf204 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf204, (2304,), is_leaf=True)  # arg204_1
+	    buf205 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf205, (768, 768), is_leaf=True)  # arg205_1
+	    buf206 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf206, (768,), is_leaf=True)  # arg206_1
+	    buf207 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf207, (768,), is_leaf=True)  # arg207_1
+	    buf208 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf208, (768,), is_leaf=True)  # arg208_1
+	    buf209 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf209, (3072, 768), is_leaf=True)  # arg209_1
+	    buf210 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf210, (3072,), is_leaf=True)  # arg210_1
+	    buf211 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf211, (768, 3072), is_leaf=True)  # arg211_1
+	    buf212 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf212, (768,), is_leaf=True)  # arg212_1
+	    buf213 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf213, (768,), is_leaf=True)  # arg213_1
+	    buf214 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf214, (768,), is_leaf=True)  # arg214_1
+	    buf215 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf215, (2304, 768), is_leaf=True)  # arg215_1
+	    buf216 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf216, (2304,), is_leaf=True)  # arg216_1
+	    buf217 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf217, (768, 768), is_leaf=True)  # arg217_1
+	    buf218 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf218, (768,), is_leaf=True)  # arg218_1
+	    buf219 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf219, (768,), is_leaf=True)  # arg219_1
+	    buf220 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf220, (768,), is_leaf=True)  # arg220_1
+	    buf221 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf221, (3072, 768), is_leaf=True)  # arg221_1
+	    buf222 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf222, (3072,), is_leaf=True)  # arg222_1
+	    buf223 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf223, (768, 3072), is_leaf=True)  # arg223_1
+	    buf224 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf224, (768,), is_leaf=True)  # arg224_1
+	    buf225 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf225, (768,), is_leaf=True)  # arg225_1
+	    buf226 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf226, (768,), is_leaf=True)  # arg226_1
+	    buf227 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf227, (2304, 768), is_leaf=True)  # arg227_1
+	    buf228 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf228, (2304,), is_leaf=True)  # arg228_1
+	    buf229 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf229, (768, 768), is_leaf=True)  # arg229_1
+	    buf230 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf230, (768,), is_leaf=True)  # arg230_1
+	    buf231 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf231, (768,), is_leaf=True)  # arg231_1
+	    buf232 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf232, (768,), is_leaf=True)  # arg232_1
+	    buf233 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf233, (3072, 768), is_leaf=True)  # arg233_1
+	    buf234 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf234, (3072,), is_leaf=True)  # arg234_1
+	    buf235 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf235, (768, 3072), is_leaf=True)  # arg235_1
+	    buf236 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf236, (768,), is_leaf=True)  # arg236_1
+	    buf237 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf237, (768,), is_leaf=True)  # arg237_1
+	    buf238 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf238, (768,), is_leaf=True)  # arg238_1
+	    buf239 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf239, (2304, 768), is_leaf=True)  # arg239_1
+	    buf240 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf240, (2304,), is_leaf=True)  # arg240_1
+	    buf241 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf241, (768, 768), is_leaf=True)  # arg241_1
+	    buf242 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf242, (768,), is_leaf=True)  # arg242_1
+	    buf243 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf243, (768,), is_leaf=True)  # arg243_1
+	    buf244 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf244, (768,), is_leaf=True)  # arg244_1
+	    buf245 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf245, (3072, 768), is_leaf=True)  # arg245_1
+	    buf246 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf246, (3072,), is_leaf=True)  # arg246_1
+	    buf247 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf247, (768, 3072), is_leaf=True)  # arg247_1
+	    buf248 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf248, (768,), is_leaf=True)  # arg248_1
+	    buf249 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf249, (768,), is_leaf=True)  # arg249_1
+	    buf250 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf250, (768,), is_leaf=True)  # arg250_1
+	    buf251 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf251, (2304, 768), is_leaf=True)  # arg251_1
+	    buf252 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf252, (2304,), is_leaf=True)  # arg252_1
+	    buf253 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf253, (768, 768), is_leaf=True)  # arg253_1
+	    buf254 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf254, (768,), is_leaf=True)  # arg254_1
+	    buf255 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf255, (768,), is_leaf=True)  # arg255_1
+	    buf256 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf256, (768,), is_leaf=True)  # arg256_1
+	    buf257 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf257, (3072, 768), is_leaf=True)  # arg257_1
+	    buf258 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf258, (3072,), is_leaf=True)  # arg258_1
+	    buf259 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf259, (768, 3072), is_leaf=True)  # arg259_1
+	    buf260 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf260, (768,), is_leaf=True)  # arg260_1
+	    buf261 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf261, (768,), is_leaf=True)  # arg261_1
+	    buf262 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf262, (768,), is_leaf=True)  # arg262_1
+	    buf263 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf263, (2304, 768), is_leaf=True)  # arg263_1
+	    buf264 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf264, (2304,), is_leaf=True)  # arg264_1
+	    buf265 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf265, (768, 768), is_leaf=True)  # arg265_1
+	    buf266 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf266, (768,), is_leaf=True)  # arg266_1
+	    buf267 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf267, (768,), is_leaf=True)  # arg267_1
+	    buf268 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf268, (768,), is_leaf=True)  # arg268_1
+	    buf269 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf269, (3072, 768), is_leaf=True)  # arg269_1
+	    buf270 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf270, (3072,), is_leaf=True)  # arg270_1
+	    buf271 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf271, (768, 3072), is_leaf=True)  # arg271_1
+	    buf272 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf272, (768,), is_leaf=True)  # arg272_1
+	    buf273 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf273, (768,), is_leaf=True)  # arg273_1
+	    buf274 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf274, (768,), is_leaf=True)  # arg274_1
+	    buf275 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf275, (2304, 768), is_leaf=True)  # arg275_1
+	    buf276 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf276, (2304,), is_leaf=True)  # arg276_1
+	    buf277 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf277, (768, 768), is_leaf=True)  # arg277_1
+	    buf278 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf278, (768,), is_leaf=True)  # arg278_1
+	    buf279 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf279, (768,), is_leaf=True)  # arg279_1
+	    buf280 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf280, (768,), is_leaf=True)  # arg280_1
+	    buf281 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf281, (3072, 768), is_leaf=True)  # arg281_1
+	    buf282 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf282, (3072,), is_leaf=True)  # arg282_1
+	    buf283 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf283, (768, 3072), is_leaf=True)  # arg283_1
+	    buf284 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf284, (768,), is_leaf=True)  # arg284_1
+	    buf285 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf285, (768,), is_leaf=True)  # arg285_1
+	    buf286 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf286, (768,), is_leaf=True)  # arg286_1
+	    buf287 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf287, (2304, 768), is_leaf=True)  # arg287_1
+	    buf288 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf288, (2304,), is_leaf=True)  # arg288_1
+	    buf289 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf289, (768, 768), is_leaf=True)  # arg289_1
+	    buf290 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf290, (768,), is_leaf=True)  # arg290_1
+	    buf291 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf291, (768,), is_leaf=True)  # arg291_1
+	    buf292 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf292, (768,), is_leaf=True)  # arg292_1
+	    buf293 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf293, (3072, 768), is_leaf=True)  # arg293_1
+	    buf294 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf294, (3072,), is_leaf=True)  # arg294_1
+	    buf295 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf295, (768, 3072), is_leaf=True)  # arg295_1
+	    buf296 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf296, (768,), is_leaf=True)  # arg296_1
+	    buf297 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf297, (768,), is_leaf=True)  # arg297_1
+	    buf298 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf298, (768,), is_leaf=True)  # arg298_1
+	    buf299 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf299, (50304, 768), is_leaf=True)  # arg299_1
+	    buf300 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf300, (768,), is_leaf=True)  # arg300_1
+	    buf301 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf301, (768,), is_leaf=True)  # arg301_1
+	    buf302 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf302, (2304, 768), is_leaf=True)  # arg302_1
+	    buf303 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf303, (2304,), is_leaf=True)  # arg303_1
+	    buf304 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf304, (768, 768), is_leaf=True)  # arg304_1
+	    buf305 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf305, (768,), is_leaf=True)  # arg305_1
+	    buf306 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf306, (768,), is_leaf=True)  # arg306_1
+	    buf307 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf307, (768,), is_leaf=True)  # arg307_1
+	    buf308 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf308, (3072, 768), is_leaf=True)  # arg308_1
+	    buf309 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf309, (3072,), is_leaf=True)  # arg309_1
+	    buf310 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf310, (768, 3072), is_leaf=True)  # arg310_1
+	    buf311 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf311, (768,), is_leaf=True)  # arg311_1
+	    buf312 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf312, (768,), is_leaf=True)  # arg312_1
+	    buf313 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf313, (768,), is_leaf=True)  # arg313_1
+	    buf314 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf314, (2304, 768), is_leaf=True)  # arg314_1
+	    buf315 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf315, (2304,), is_leaf=True)  # arg315_1
+	    buf316 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf316, (768, 768), is_leaf=True)  # arg316_1
+	    buf317 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf317, (768,), is_leaf=True)  # arg317_1
+	    buf318 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf318, (768,), is_leaf=True)  # arg318_1
+	    buf319 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf319, (768,), is_leaf=True)  # arg319_1
+	    buf320 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf320, (3072, 768), is_leaf=True)  # arg320_1
+	    buf321 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf321, (3072,), is_leaf=True)  # arg321_1
+	    buf322 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf322, (768, 3072), is_leaf=True)  # arg322_1
+	    buf323 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf323, (768,), is_leaf=True)  # arg323_1
+	    buf324 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf324, (768,), is_leaf=True)  # arg324_1
+	    buf325 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf325, (768,), is_leaf=True)  # arg325_1
+	    buf326 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf326, (2304, 768), is_leaf=True)  # arg326_1
+	    buf327 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf327, (2304,), is_leaf=True)  # arg327_1
+	    buf328 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf328, (768, 768), is_leaf=True)  # arg328_1
+	    buf329 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf329, (768,), is_leaf=True)  # arg329_1
+	    buf330 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf330, (768,), is_leaf=True)  # arg330_1
+	    buf331 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf331, (768,), is_leaf=True)  # arg331_1
+	    buf332 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf332, (3072, 768), is_leaf=True)  # arg332_1
+	    buf333 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf333, (3072,), is_leaf=True)  # arg333_1
+	    buf334 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf334, (768, 3072), is_leaf=True)  # arg334_1
+	    buf335 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf335, (768,), is_leaf=True)  # arg335_1
+	    buf336 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf336, (768,), is_leaf=True)  # arg336_1
+	    buf337 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf337, (768,), is_leaf=True)  # arg337_1
+	    buf338 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf338, (2304, 768), is_leaf=True)  # arg338_1
+	    buf339 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf339, (2304,), is_leaf=True)  # arg339_1
+	    buf340 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf340, (768, 768), is_leaf=True)  # arg340_1
+	    buf341 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf341, (768,), is_leaf=True)  # arg341_1
+	    buf342 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf342, (768,), is_leaf=True)  # arg342_1
+	    buf343 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf343, (768,), is_leaf=True)  # arg343_1
+	    buf344 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf344, (3072, 768), is_leaf=True)  # arg344_1
+	    buf345 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf345, (3072,), is_leaf=True)  # arg345_1
+	    buf346 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf346, (768, 3072), is_leaf=True)  # arg346_1
+	    buf347 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf347, (768,), is_leaf=True)  # arg347_1
+	    buf348 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf348, (768,), is_leaf=True)  # arg348_1
+	    buf349 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf349, (768,), is_leaf=True)  # arg349_1
+	    buf350 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf350, (2304, 768), is_leaf=True)  # arg350_1
+	    buf351 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf351, (2304,), is_leaf=True)  # arg351_1
+	    buf352 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf352, (768, 768), is_leaf=True)  # arg352_1
+	    buf353 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf353, (768,), is_leaf=True)  # arg353_1
+	    buf354 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf354, (768,), is_leaf=True)  # arg354_1
+	    buf355 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf355, (768,), is_leaf=True)  # arg355_1
+	    buf356 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf356, (3072, 768), is_leaf=True)  # arg356_1
+	    buf357 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf357, (3072,), is_leaf=True)  # arg357_1
+	    buf358 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf358, (768, 3072), is_leaf=True)  # arg358_1
+	    buf359 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf359, (768,), is_leaf=True)  # arg359_1
+	    buf360 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf360, (768,), is_leaf=True)  # arg360_1
+	    buf361 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf361, (768,), is_leaf=True)  # arg361_1
+	    buf362 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf362, (2304, 768), is_leaf=True)  # arg362_1
+	    buf363 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf363, (2304,), is_leaf=True)  # arg363_1
+	    buf364 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf364, (768, 768), is_leaf=True)  # arg364_1
+	    buf365 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf365, (768,), is_leaf=True)  # arg365_1
+	    buf366 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf366, (768,), is_leaf=True)  # arg366_1
+	    buf367 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf367, (768,), is_leaf=True)  # arg367_1
+	    buf368 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf368, (3072, 768), is_leaf=True)  # arg368_1
+	    buf369 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf369, (3072,), is_leaf=True)  # arg369_1
+	    buf370 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf370, (768, 3072), is_leaf=True)  # arg370_1
+	    buf371 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf371, (768,), is_leaf=True)  # arg371_1
+	    buf372 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf372, (768,), is_leaf=True)  # arg372_1
+	    buf373 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf373, (768,), is_leaf=True)  # arg373_1
+	    buf374 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf374, (2304, 768), is_leaf=True)  # arg374_1
+	    buf375 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf375, (2304,), is_leaf=True)  # arg375_1
+	    buf376 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf376, (768, 768), is_leaf=True)  # arg376_1
+	    buf377 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf377, (768,), is_leaf=True)  # arg377_1
+	    buf378 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf378, (768,), is_leaf=True)  # arg378_1
+	    buf379 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf379, (768,), is_leaf=True)  # arg379_1
+	    buf380 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf380, (3072, 768), is_leaf=True)  # arg380_1
+	    buf381 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf381, (3072,), is_leaf=True)  # arg381_1
+	    buf382 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf382, (768, 3072), is_leaf=True)  # arg382_1
+	    buf383 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf383, (768,), is_leaf=True)  # arg383_1
+	    buf384 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf384, (768,), is_leaf=True)  # arg384_1
+	    buf385 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf385, (768,), is_leaf=True)  # arg385_1
+	    buf386 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf386, (2304, 768), is_leaf=True)  # arg386_1
+	    buf387 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf387, (2304,), is_leaf=True)  # arg387_1
+	    buf388 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf388, (768, 768), is_leaf=True)  # arg388_1
+	    buf389 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf389, (768,), is_leaf=True)  # arg389_1
+	    buf390 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf390, (768,), is_leaf=True)  # arg390_1
+	    buf391 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf391, (768,), is_leaf=True)  # arg391_1
+	    buf392 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf392, (3072, 768), is_leaf=True)  # arg392_1
+	    buf393 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf393, (3072,), is_leaf=True)  # arg393_1
+	    buf394 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf394, (768, 3072), is_leaf=True)  # arg394_1
+	    buf395 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf395, (768,), is_leaf=True)  # arg395_1
+	    buf396 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf396, (768,), is_leaf=True)  # arg396_1
+	    buf397 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf397, (768,), is_leaf=True)  # arg397_1
+	    buf398 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf398, (2304, 768), is_leaf=True)  # arg398_1
+	    buf399 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf399, (2304,), is_leaf=True)  # arg399_1
+	    buf400 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf400, (768, 768), is_leaf=True)  # arg400_1
+	    buf401 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf401, (768,), is_leaf=True)  # arg401_1
+	    buf402 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf402, (768,), is_leaf=True)  # arg402_1
+	    buf403 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf403, (768,), is_leaf=True)  # arg403_1
+	    buf404 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf404, (3072, 768), is_leaf=True)  # arg404_1
+	    buf405 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf405, (3072,), is_leaf=True)  # arg405_1
+	    buf406 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf406, (768, 3072), is_leaf=True)  # arg406_1
+	    buf407 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf407, (768,), is_leaf=True)  # arg407_1
+	    buf408 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf408, (768,), is_leaf=True)  # arg408_1
+	    buf409 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf409, (768,), is_leaf=True)  # arg409_1
+	    buf410 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf410, (2304, 768), is_leaf=True)  # arg410_1
+	    buf411 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf411, (2304,), is_leaf=True)  # arg411_1
+	    buf412 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf412, (768, 768), is_leaf=True)  # arg412_1
+	    buf413 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf413, (768,), is_leaf=True)  # arg413_1
+	    buf414 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf414, (768,), is_leaf=True)  # arg414_1
+	    buf415 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf415, (768,), is_leaf=True)  # arg415_1
+	    buf416 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf416, (3072, 768), is_leaf=True)  # arg416_1
+	    buf417 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf417, (3072,), is_leaf=True)  # arg417_1
+	    buf418 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf418, (768, 3072), is_leaf=True)  # arg418_1
+	    buf419 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf419, (768,), is_leaf=True)  # arg419_1
+	    buf420 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf420, (768,), is_leaf=True)  # arg420_1
+	    buf421 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf421, (768,), is_leaf=True)  # arg421_1
+	    buf422 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf422, (2304, 768), is_leaf=True)  # arg422_1
+	    buf423 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf423, (2304,), is_leaf=True)  # arg423_1
+	    buf424 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf424, (768, 768), is_leaf=True)  # arg424_1
+	    buf425 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf425, (768,), is_leaf=True)  # arg425_1
+	    buf426 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf426, (768,), is_leaf=True)  # arg426_1
+	    buf427 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf427, (768,), is_leaf=True)  # arg427_1
+	    buf428 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf428, (3072, 768), is_leaf=True)  # arg428_1
+	    buf429 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf429, (3072,), is_leaf=True)  # arg429_1
+	    buf430 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf430, (768, 3072), is_leaf=True)  # arg430_1
+	    buf431 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf431, (768,), is_leaf=True)  # arg431_1
+	    buf432 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf432, (768,), is_leaf=True)  # arg432_1
+	    buf433 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf433, (768,), is_leaf=True)  # arg433_1
+	    buf434 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf434, (2304, 768), is_leaf=True)  # arg434_1
+	    buf435 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf435, (2304,), is_leaf=True)  # arg435_1
+	    buf436 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf436, (768, 768), is_leaf=True)  # arg436_1
+	    buf437 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf437, (768,), is_leaf=True)  # arg437_1
+	    buf438 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf438, (768,), is_leaf=True)  # arg438_1
+	    buf439 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf439, (768,), is_leaf=True)  # arg439_1
+	    buf440 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf440, (3072, 768), is_leaf=True)  # arg440_1
+	    buf441 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf441, (3072,), is_leaf=True)  # arg441_1
+	    buf442 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf442, (768, 3072), is_leaf=True)  # arg442_1
+	    buf443 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf443, (768,), is_leaf=True)  # arg443_1
+	    buf444 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf444, (768,), is_leaf=True)  # arg444_1
+	    buf445 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf445, (768,), is_leaf=True)  # arg445_1
+	    buf446 = reader.storage(None, 154533888, device=device(type='cuda', index=0))
+	    reader.tensor(buf446, (50304, 768), is_leaf=True)  # arg446_1
+	    buf447 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf447, (768,), is_leaf=True)  # arg447_1
+	    buf448 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf448, (768,), is_leaf=True)  # arg448_1
+	    buf449 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf449, (2304, 768), is_leaf=True)  # arg449_1
+	    buf450 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf450, (2304,), is_leaf=True)  # arg450_1
+	    buf451 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf451, (768, 768), is_leaf=True)  # arg451_1
+	    buf452 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf452, (768,), is_leaf=True)  # arg452_1
+	    buf453 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf453, (768,), is_leaf=True)  # arg453_1
+	    buf454 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf454, (768,), is_leaf=True)  # arg454_1
+	    buf455 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf455, (3072, 768), is_leaf=True)  # arg455_1
+	    buf456 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf456, (3072,), is_leaf=True)  # arg456_1
+	    buf457 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf457, (768, 3072), is_leaf=True)  # arg457_1
+	    buf458 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf458, (768,), is_leaf=True)  # arg458_1
+	    buf459 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf459, (768,), is_leaf=True)  # arg459_1
+	    buf460 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf460, (768,), is_leaf=True)  # arg460_1
+	    buf461 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf461, (2304, 768), is_leaf=True)  # arg461_1
+	    buf462 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf462, (2304,), is_leaf=True)  # arg462_1
+	    buf463 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf463, (768, 768), is_leaf=True)  # arg463_1
+	    buf464 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf464, (768,), is_leaf=True)  # arg464_1
+	    buf465 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf465, (768,), is_leaf=True)  # arg465_1
+	    buf466 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf466, (768,), is_leaf=True)  # arg466_1
+	    buf467 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf467, (3072, 768), is_leaf=True)  # arg467_1
+	    buf468 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf468, (3072,), is_leaf=True)  # arg468_1
+	    buf469 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf469, (768, 3072), is_leaf=True)  # arg469_1
+	    buf470 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf470, (768,), is_leaf=True)  # arg470_1
+	    buf471 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf471, (768,), is_leaf=True)  # arg471_1
+	    buf472 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf472, (768,), is_leaf=True)  # arg472_1
+	    buf473 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf473, (2304, 768), is_leaf=True)  # arg473_1
+	    buf474 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf474, (2304,), is_leaf=True)  # arg474_1
+	    buf475 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf475, (768, 768), is_leaf=True)  # arg475_1
+	    buf476 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf476, (768,), is_leaf=True)  # arg476_1
+	    buf477 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf477, (768,), is_leaf=True)  # arg477_1
+	    buf478 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf478, (768,), is_leaf=True)  # arg478_1
+	    buf479 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf479, (3072, 768), is_leaf=True)  # arg479_1
+	    buf480 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf480, (3072,), is_leaf=True)  # arg480_1
+	    buf481 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf481, (768, 3072), is_leaf=True)  # arg481_1
+	    buf482 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf482, (768,), is_leaf=True)  # arg482_1
+	    buf483 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf483, (768,), is_leaf=True)  # arg483_1
+	    buf484 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf484, (768,), is_leaf=True)  # arg484_1
+	    buf485 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf485, (2304, 768), is_leaf=True)  # arg485_1
+	    buf486 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf486, (2304,), is_leaf=True)  # arg486_1
+	    buf487 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf487, (768, 768), is_leaf=True)  # arg487_1
+	    buf488 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf488, (768,), is_leaf=True)  # arg488_1
+	    buf489 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf489, (768,), is_leaf=True)  # arg489_1
+	    buf490 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf490, (768,), is_leaf=True)  # arg490_1
+	    buf491 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf491, (3072, 768), is_leaf=True)  # arg491_1
+	    buf492 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf492, (3072,), is_leaf=True)  # arg492_1
+	    buf493 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf493, (768, 3072), is_leaf=True)  # arg493_1
+	    buf494 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf494, (768,), is_leaf=True)  # arg494_1
+	    buf495 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf495, (768,), is_leaf=True)  # arg495_1
+	    buf496 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf496, (768,), is_leaf=True)  # arg496_1
+	    buf497 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf497, (2304, 768), is_leaf=True)  # arg497_1
+	    buf498 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf498, (2304,), is_leaf=True)  # arg498_1
+	    buf499 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf499, (768, 768), is_leaf=True)  # arg499_1
+	    buf500 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf500, (768,), is_leaf=True)  # arg500_1
+	    buf501 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf501, (768,), is_leaf=True)  # arg501_1
+	    buf502 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf502, (768,), is_leaf=True)  # arg502_1
+	    buf503 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf503, (3072, 768), is_leaf=True)  # arg503_1
+	    buf504 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf504, (3072,), is_leaf=True)  # arg504_1
+	    buf505 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf505, (768, 3072), is_leaf=True)  # arg505_1
+	    buf506 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf506, (768,), is_leaf=True)  # arg506_1
+	    buf507 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf507, (768,), is_leaf=True)  # arg507_1
+	    buf508 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf508, (768,), is_leaf=True)  # arg508_1
+	    buf509 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf509, (2304, 768), is_leaf=True)  # arg509_1
+	    buf510 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf510, (2304,), is_leaf=True)  # arg510_1
+	    buf511 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf511, (768, 768), is_leaf=True)  # arg511_1
+	    buf512 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf512, (768,), is_leaf=True)  # arg512_1
+	    buf513 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf513, (768,), is_leaf=True)  # arg513_1
+	    buf514 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf514, (768,), is_leaf=True)  # arg514_1
+	    buf515 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf515, (3072, 768), is_leaf=True)  # arg515_1
+	    buf516 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf516, (3072,), is_leaf=True)  # arg516_1
+	    buf517 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf517, (768, 3072), is_leaf=True)  # arg517_1
+	    buf518 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf518, (768,), is_leaf=True)  # arg518_1
+	    buf519 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf519, (768,), is_leaf=True)  # arg519_1
+	    buf520 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf520, (768,), is_leaf=True)  # arg520_1
+	    buf521 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf521, (2304, 768), is_leaf=True)  # arg521_1
+	    buf522 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf522, (2304,), is_leaf=True)  # arg522_1
+	    buf523 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf523, (768, 768), is_leaf=True)  # arg523_1
+	    buf524 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf524, (768,), is_leaf=True)  # arg524_1
+	    buf525 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf525, (768,), is_leaf=True)  # arg525_1
+	    buf526 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf526, (768,), is_leaf=True)  # arg526_1
+	    buf527 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf527, (3072, 768), is_leaf=True)  # arg527_1
+	    buf528 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf528, (3072,), is_leaf=True)  # arg528_1
+	    buf529 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf529, (768, 3072), is_leaf=True)  # arg529_1
+	    buf530 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf530, (768,), is_leaf=True)  # arg530_1
+	    buf531 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf531, (768,), is_leaf=True)  # arg531_1
+	    buf532 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf532, (768,), is_leaf=True)  # arg532_1
+	    buf533 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf533, (2304, 768), is_leaf=True)  # arg533_1
+	    buf534 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf534, (2304,), is_leaf=True)  # arg534_1
+	    buf535 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf535, (768, 768), is_leaf=True)  # arg535_1
+	    buf536 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf536, (768,), is_leaf=True)  # arg536_1
+	    buf537 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf537, (768,), is_leaf=True)  # arg537_1
+	    buf538 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf538, (768,), is_leaf=True)  # arg538_1
+	    buf539 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf539, (3072, 768), is_leaf=True)  # arg539_1
+	    buf540 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf540, (3072,), is_leaf=True)  # arg540_1
+	    buf541 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf541, (768, 3072), is_leaf=True)  # arg541_1
+	    buf542 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf542, (768,), is_leaf=True)  # arg542_1
+	    buf543 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf543, (768,), is_leaf=True)  # arg543_1
+	    buf544 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf544, (768,), is_leaf=True)  # arg544_1
+	    buf545 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf545, (2304, 768), is_leaf=True)  # arg545_1
+	    buf546 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf546, (2304,), is_leaf=True)  # arg546_1
+	    buf547 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf547, (768, 768), is_leaf=True)  # arg547_1
+	    buf548 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf548, (768,), is_leaf=True)  # arg548_1
+	    buf549 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf549, (768,), is_leaf=True)  # arg549_1
+	    buf550 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf550, (768,), is_leaf=True)  # arg550_1
+	    buf551 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf551, (3072, 768), is_leaf=True)  # arg551_1
+	    buf552 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf552, (3072,), is_leaf=True)  # arg552_1
+	    buf553 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf553, (768, 3072), is_leaf=True)  # arg553_1
+	    buf554 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf554, (768,), is_leaf=True)  # arg554_1
+	    buf555 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf555, (768,), is_leaf=True)  # arg555_1
+	    buf556 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf556, (768,), is_leaf=True)  # arg556_1
+	    buf557 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf557, (2304, 768), is_leaf=True)  # arg557_1
+	    buf558 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf558, (2304,), is_leaf=True)  # arg558_1
+	    buf559 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf559, (768, 768), is_leaf=True)  # arg559_1
+	    buf560 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf560, (768,), is_leaf=True)  # arg560_1
+	    buf561 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf561, (768,), is_leaf=True)  # arg561_1
+	    buf562 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf562, (768,), is_leaf=True)  # arg562_1
+	    buf563 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf563, (3072, 768), is_leaf=True)  # arg563_1
+	    buf564 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf564, (3072,), is_leaf=True)  # arg564_1
+	    buf565 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf565, (768, 3072), is_leaf=True)  # arg565_1
+	    buf566 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf566, (768,), is_leaf=True)  # arg566_1
+	    buf567 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf567, (768,), is_leaf=True)  # arg567_1
+	    buf568 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf568, (768,), is_leaf=True)  # arg568_1
+	    buf569 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf569, (2304, 768), is_leaf=True)  # arg569_1
+	    buf570 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf570, (2304,), is_leaf=True)  # arg570_1
+	    buf571 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf571, (768, 768), is_leaf=True)  # arg571_1
+	    buf572 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf572, (768,), is_leaf=True)  # arg572_1
+	    buf573 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf573, (768,), is_leaf=True)  # arg573_1
+	    buf574 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf574, (768,), is_leaf=True)  # arg574_1
+	    buf575 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf575, (3072, 768), is_leaf=True)  # arg575_1
+	    buf576 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf576, (3072,), is_leaf=True)  # arg576_1
+	    buf577 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf577, (768, 3072), is_leaf=True)  # arg577_1
+	    buf578 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf578, (768,), is_leaf=True)  # arg578_1
+	    buf579 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf579, (768,), is_leaf=True)  # arg579_1
+	    buf580 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf580, (768,), is_leaf=True)  # arg580_1
+	    buf581 = reader.storage(None, 7077888, device=device(type='cuda', index=0))
+	    reader.tensor(buf581, (2304, 768), is_leaf=True)  # arg581_1
+	    buf582 = reader.storage(None, 9216, device=device(type='cuda', index=0))
+	    reader.tensor(buf582, (2304,), is_leaf=True)  # arg582_1
+	    buf583 = reader.storage(None, 2359296, device=device(type='cuda', index=0))
+	    reader.tensor(buf583, (768, 768), is_leaf=True)  # arg583_1
+	    buf584 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf584, (768,), is_leaf=True)  # arg584_1
+	    buf585 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf585, (768,), is_leaf=True)  # arg585_1
+	    buf586 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf586, (768,), is_leaf=True)  # arg586_1
+	    buf587 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf587, (3072, 768), is_leaf=True)  # arg587_1
+	    buf588 = reader.storage(None, 12288, device=device(type='cuda', index=0))
+	    reader.tensor(buf588, (3072,), is_leaf=True)  # arg588_1
+	    buf589 = reader.storage(None, 9437184, device=device(type='cuda', index=0))
+	    reader.tensor(buf589, (768, 3072), is_leaf=True)  # arg589_1
+	    buf590 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf590, (768,), is_leaf=True)  # arg590_1
+	    buf591 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf591, (768,), is_leaf=True)  # arg591_1
+	    buf592 = reader.storage(None, 3072, device=device(type='cuda', index=0))
+	    reader.tensor(buf592, (768,), is_leaf=True)  # arg592_1
+	    buf593 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf593, (), is_leaf=True)  # arg593_1
+	    buf594 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf594, (), is_leaf=True)  # arg594_1
+	    buf595 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf595, (), is_leaf=True)  # arg595_1
+	    buf596 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf596, (), is_leaf=True)  # arg596_1
+	    buf597 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf597, (), is_leaf=True)  # arg597_1
+	    buf598 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf598, (), is_leaf=True)  # arg598_1
+	    buf599 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf599, (), is_leaf=True)  # arg599_1
+	    buf600 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf600, (), is_leaf=True)  # arg600_1
+	    buf601 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf601, (), is_leaf=True)  # arg601_1
+	    buf602 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf602, (), is_leaf=True)  # arg602_1
+	    buf603 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf603, (), is_leaf=True)  # arg603_1
+	    buf604 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf604, (), is_leaf=True)  # arg604_1
+	    buf605 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf605, (), is_leaf=True)  # arg605_1
+	    buf606 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf606, (), is_leaf=True)  # arg606_1
+	    buf607 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf607, (), is_leaf=True)  # arg607_1
+	    buf608 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf608, (), is_leaf=True)  # arg608_1
+	    buf609 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf609, (), is_leaf=True)  # arg609_1
+	    buf610 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf610, (), is_leaf=True)  # arg610_1
+	    buf611 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf611, (), is_leaf=True)  # arg611_1
+	    buf612 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf612, (), is_leaf=True)  # arg612_1
+	    buf613 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf613, (), is_leaf=True)  # arg613_1
+	    buf614 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf614, (), is_leaf=True)  # arg614_1
+	    buf615 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf615, (), is_leaf=True)  # arg615_1
+	    buf616 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf616, (), is_leaf=True)  # arg616_1
+	    buf617 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf617, (), is_leaf=True)  # arg617_1
+	    buf618 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf618, (), is_leaf=True)  # arg618_1
+	    buf619 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf619, (), is_leaf=True)  # arg619_1
+	    buf620 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf620, (), is_leaf=True)  # arg620_1
+	    buf621 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf621, (), is_leaf=True)  # arg621_1
+	    buf622 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf622, (), is_leaf=True)  # arg622_1
+	    buf623 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf623, (), is_leaf=True)  # arg623_1
+	    buf624 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf624, (), is_leaf=True)  # arg624_1
+	    buf625 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf625, (), is_leaf=True)  # arg625_1
+	    buf626 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf626, (), is_leaf=True)  # arg626_1
+	    buf627 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf627, (), is_leaf=True)  # arg627_1
+	    buf628 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf628, (), is_leaf=True)  # arg628_1
+	    buf629 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf629, (), is_leaf=True)  # arg629_1
+	    buf630 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf630, (), is_leaf=True)  # arg630_1
+	    buf631 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf631, (), is_leaf=True)  # arg631_1
+	    buf632 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf632, (), is_leaf=True)  # arg632_1
+	    buf633 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf633, (), is_leaf=True)  # arg633_1
+	    buf634 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf634, (), is_leaf=True)  # arg634_1
+	    buf635 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf635, (), is_leaf=True)  # arg635_1
+	    buf636 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf636, (), is_leaf=True)  # arg636_1
+	    buf637 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf637, (), is_leaf=True)  # arg637_1
+	    buf638 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf638, (), is_leaf=True)  # arg638_1
+	    buf639 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf639, (), is_leaf=True)  # arg639_1
+	    buf640 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf640, (), is_leaf=True)  # arg640_1
+	    buf641 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf641, (), is_leaf=True)  # arg641_1
+	    buf642 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf642, (), is_leaf=True)  # arg642_1
+	    buf643 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf643, (), is_leaf=True)  # arg643_1
+	    buf644 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf644, (), is_leaf=True)  # arg644_1
+	    buf645 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf645, (), is_leaf=True)  # arg645_1
+	    buf646 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf646, (), is_leaf=True)  # arg646_1
+	    buf647 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf647, (), is_leaf=True)  # arg647_1
+	    buf648 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf648, (), is_leaf=True)  # arg648_1
+	    buf649 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf649, (), is_leaf=True)  # arg649_1
+	    buf650 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf650, (), is_leaf=True)  # arg650_1
+	    buf651 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf651, (), is_leaf=True)  # arg651_1
+	    buf652 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf652, (), is_leaf=True)  # arg652_1
+	    buf653 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf653, (), is_leaf=True)  # arg653_1
+	    buf654 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf654, (), is_leaf=True)  # arg654_1
+	    buf655 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf655, (), is_leaf=True)  # arg655_1
+	    buf656 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf656, (), is_leaf=True)  # arg656_1
+	    buf657 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf657, (), is_leaf=True)  # arg657_1
+	    buf658 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf658, (), is_leaf=True)  # arg658_1
+	    buf659 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf659, (), is_leaf=True)  # arg659_1
+	    buf660 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf660, (), is_leaf=True)  # arg660_1
+	    buf661 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf661, (), is_leaf=True)  # arg661_1
+	    buf662 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf662, (), is_leaf=True)  # arg662_1
+	    buf663 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf663, (), is_leaf=True)  # arg663_1
+	    buf664 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf664, (), is_leaf=True)  # arg664_1
+	    buf665 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf665, (), is_leaf=True)  # arg665_1
+	    buf666 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf666, (), is_leaf=True)  # arg666_1
+	    buf667 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf667, (), is_leaf=True)  # arg667_1
+	    buf668 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf668, (), is_leaf=True)  # arg668_1
+	    buf669 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf669, (), is_leaf=True)  # arg669_1
+	    buf670 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf670, (), is_leaf=True)  # arg670_1
+	    buf671 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf671, (), is_leaf=True)  # arg671_1
+	    buf672 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf672, (), is_leaf=True)  # arg672_1
+	    buf673 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf673, (), is_leaf=True)  # arg673_1
+	    buf674 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf674, (), is_leaf=True)  # arg674_1
+	    buf675 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf675, (), is_leaf=True)  # arg675_1
+	    buf676 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf676, (), is_leaf=True)  # arg676_1
+	    buf677 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf677, (), is_leaf=True)  # arg677_1
+	    buf678 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf678, (), is_leaf=True)  # arg678_1
+	    buf679 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf679, (), is_leaf=True)  # arg679_1
+	    buf680 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf680, (), is_leaf=True)  # arg680_1
+	    buf681 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf681, (), is_leaf=True)  # arg681_1
+	    buf682 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf682, (), is_leaf=True)  # arg682_1
+	    buf683 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf683, (), is_leaf=True)  # arg683_1
+	    buf684 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf684, (), is_leaf=True)  # arg684_1
+	    buf685 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf685, (), is_leaf=True)  # arg685_1
+	    buf686 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf686, (), is_leaf=True)  # arg686_1
+	    buf687 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf687, (), is_leaf=True)  # arg687_1
+	    buf688 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf688, (), is_leaf=True)  # arg688_1
+	    buf689 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf689, (), is_leaf=True)  # arg689_1
+	    buf690 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf690, (), is_leaf=True)  # arg690_1
+	    buf691 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf691, (), is_leaf=True)  # arg691_1
+	    buf692 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf692, (), is_leaf=True)  # arg692_1
+	    buf693 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf693, (), is_leaf=True)  # arg693_1
+	    buf694 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf694, (), is_leaf=True)  # arg694_1
+	    buf695 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf695, (), is_leaf=True)  # arg695_1
+	    buf696 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf696, (), is_leaf=True)  # arg696_1
+	    buf697 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf697, (), is_leaf=True)  # arg697_1
+	    buf698 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf698, (), is_leaf=True)  # arg698_1
+	    buf699 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf699, (), is_leaf=True)  # arg699_1
+	    buf700 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf700, (), is_leaf=True)  # arg700_1
+	    buf701 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf701, (), is_leaf=True)  # arg701_1
+	    buf702 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf702, (), is_leaf=True)  # arg702_1
+	    buf703 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf703, (), is_leaf=True)  # arg703_1
+	    buf704 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf704, (), is_leaf=True)  # arg704_1
+	    buf705 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf705, (), is_leaf=True)  # arg705_1
+	    buf706 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf706, (), is_leaf=True)  # arg706_1
+	    buf707 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf707, (), is_leaf=True)  # arg707_1
+	    buf708 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf708, (), is_leaf=True)  # arg708_1
+	    buf709 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf709, (), is_leaf=True)  # arg709_1
+	    buf710 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf710, (), is_leaf=True)  # arg710_1
+	    buf711 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf711, (), is_leaf=True)  # arg711_1
+	    buf712 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf712, (), is_leaf=True)  # arg712_1
+	    buf713 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf713, (), is_leaf=True)  # arg713_1
+	    buf714 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf714, (), is_leaf=True)  # arg714_1
+	    buf715 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf715, (), is_leaf=True)  # arg715_1
+	    buf716 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf716, (), is_leaf=True)  # arg716_1
+	    buf717 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf717, (), is_leaf=True)  # arg717_1
+	    buf718 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf718, (), is_leaf=True)  # arg718_1
+	    buf719 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf719, (), is_leaf=True)  # arg719_1
+	    buf720 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf720, (), is_leaf=True)  # arg720_1
+	    buf721 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf721, (), is_leaf=True)  # arg721_1
+	    buf722 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf722, (), is_leaf=True)  # arg722_1
+	    buf723 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf723, (), is_leaf=True)  # arg723_1
+	    buf724 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf724, (), is_leaf=True)  # arg724_1
+	    buf725 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf725, (), is_leaf=True)  # arg725_1
+	    buf726 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf726, (), is_leaf=True)  # arg726_1
+	    buf727 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf727, (), is_leaf=True)  # arg727_1
+	    buf728 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf728, (), is_leaf=True)  # arg728_1
+	    buf729 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf729, (), is_leaf=True)  # arg729_1
+	    buf730 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf730, (), is_leaf=True)  # arg730_1
+	    buf731 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf731, (), is_leaf=True)  # arg731_1
+	    buf732 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf732, (), is_leaf=True)  # arg732_1
+	    buf733 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf733, (), is_leaf=True)  # arg733_1
+	    buf734 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf734, (), is_leaf=True)  # arg734_1
+	    buf735 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf735, (), is_leaf=True)  # arg735_1
+	    buf736 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf736, (), is_leaf=True)  # arg736_1
+	    buf737 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf737, (), is_leaf=True)  # arg737_1
+	    buf738 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf738, (), is_leaf=True)  # arg738_1
+	    buf739 = reader.storage(None, 4, device=device(type='cuda', index=0))
+	    reader.tensor(buf739, (), is_leaf=True)  # arg739_1
+	load_args._version = 0
+	mod = Repro()
+	if __name__ == '__main__':
+	    from torch._dynamo.repro.after_aot import run_repro
+	    with torch.no_grad():
+	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
+	        # To run it separately, do 
+	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
+	        # mod(*args)
+V0806 13:56:11.466000 4107173 torch/_inductor/compile_fx.py:778] {"inductor_post_grad_graph": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "1871900304899d884f7ee96375f19aa0"}
+	class <lambda>(torch.nn.Module):
+	    def forward(self, arg0_1: "f32[50304, 768][768, 1]cuda:0", arg1_1: "f32[1024, 768][768, 1]cuda:0", arg2_1: "f32[768][1]cuda:0", arg3_1: "f32[768][1]cuda:0", arg4_1: "f32[2304, 768][768, 1]cuda:0", arg5_1: "f32[2304][1]cuda:0", arg6_1: "f32[768, 768][768, 1]cuda:0", arg7_1: "f32[768][1]cuda:0", arg8_1: "f32[768][1]cuda:0", arg9_1: "f32[768][1]cuda:0", arg10_1: "f32[3072, 768][768, 1]cuda:0", arg11_1: "f32[3072][1]cuda:0", arg12_1: "f32[768, 3072][3072, 1]cuda:0", arg13_1: "f32[768][1]cuda:0", arg14_1: "f32[768][1]cuda:0", arg15_1: "f32[768][1]cuda:0", arg16_1: "f32[2304, 768][768, 1]cuda:0", arg17_1: "f32[2304][1]cuda:0", arg18_1: "f32[768, 768][768, 1]cuda:0", arg19_1: "f32[768][1]cuda:0", arg20_1: "f32[768][1]cuda:0", arg21_1: "f32[768][1]cuda:0", arg22_1: "f32[3072, 768][768, 1]cuda:0", arg23_1: "f32[3072][1]cuda:0", arg24_1: "f32[768, 3072][3072, 1]cuda:0", arg25_1: "f32[768][1]cuda:0", arg26_1: "f32[768][1]cuda:0", arg27_1: "f32[768][1]cuda:0", arg28_1: "f32[2304, 768][768, 1]cuda:0", arg29_1: "f32[2304][1]cuda:0", arg30_1: "f32[768, 768][768, 1]cuda:0", arg31_1: "f32[768][1]cuda:0", arg32_1: "f32[768][1]cuda:0", arg33_1: "f32[768][1]cuda:0", arg34_1: "f32[3072, 768][768, 1]cuda:0", arg35_1: "f32[3072][1]cuda:0", arg36_1: "f32[768, 3072][3072, 1]cuda:0", arg37_1: "f32[768][1]cuda:0", arg38_1: "f32[768][1]cuda:0", arg39_1: "f32[768][1]cuda:0", arg40_1: "f32[2304, 768][768, 1]cuda:0", arg41_1: "f32[2304][1]cuda:0", arg42_1: "f32[768, 768][768, 1]cuda:0", arg43_1: "f32[768][1]cuda:0", arg44_1: "f32[768][1]cuda:0", arg45_1: "f32[768][1]cuda:0", arg46_1: "f32[3072, 768][768, 1]cuda:0", arg47_1: "f32[3072][1]cuda:0", arg48_1: "f32[768, 3072][3072, 1]cuda:0", arg49_1: "f32[768][1]cuda:0", arg50_1: "f32[768][1]cuda:0", arg51_1: "f32[768][1]cuda:0", arg52_1: "f32[2304, 768][768, 1]cuda:0", arg53_1: "f32[2304][1]cuda:0", arg54_1: "f32[768, 768][768, 1]cuda:0", arg55_1: "f32[768][1]cuda:0", arg56_1: "f32[768][1]cuda:0", arg57_1: "f32[768][1]cuda:0", arg58_1: "f32[3072, 768][768, 1]cuda:0", arg59_1: "f32[3072][1]cuda:0", arg60_1: "f32[768, 3072][3072, 1]cuda:0", arg61_1: "f32[768][1]cuda:0", arg62_1: "f32[768][1]cuda:0", arg63_1: "f32[768][1]cuda:0", arg64_1: "f32[2304, 768][768, 1]cuda:0", arg65_1: "f32[2304][1]cuda:0", arg66_1: "f32[768, 768][768, 1]cuda:0", arg67_1: "f32[768][1]cuda:0", arg68_1: "f32[768][1]cuda:0", arg69_1: "f32[768][1]cuda:0", arg70_1: "f32[3072, 768][768, 1]cuda:0", arg71_1: "f32[3072][1]cuda:0", arg72_1: "f32[768, 3072][3072, 1]cuda:0", arg73_1: "f32[768][1]cuda:0", arg74_1: "f32[768][1]cuda:0", arg75_1: "f32[768][1]cuda:0", arg76_1: "f32[2304, 768][768, 1]cuda:0", arg77_1: "f32[2304][1]cuda:0", arg78_1: "f32[768, 768][768, 1]cuda:0", arg79_1: "f32[768][1]cuda:0", arg80_1: "f32[768][1]cuda:0", arg81_1: "f32[768][1]cuda:0", arg82_1: "f32[3072, 768][768, 1]cuda:0", arg83_1: "f32[3072][1]cuda:0", arg84_1: "f32[768, 3072][3072, 1]cuda:0", arg85_1: "f32[768][1]cuda:0", arg86_1: "f32[768][1]cuda:0", arg87_1: "f32[768][1]cuda:0", arg88_1: "f32[2304, 768][768, 1]cuda:0", arg89_1: "f32[2304][1]cuda:0", arg90_1: "f32[768, 768][768, 1]cuda:0", arg91_1: "f32[768][1]cuda:0", arg92_1: "f32[768][1]cuda:0", arg93_1: "f32[768][1]cuda:0", arg94_1: "f32[3072, 768][768, 1]cuda:0", arg95_1: "f32[3072][1]cuda:0", arg96_1: "f32[768, 3072][3072, 1]cuda:0", arg97_1: "f32[768][1]cuda:0", arg98_1: "f32[768][1]cuda:0", arg99_1: "f32[768][1]cuda:0", arg100_1: "f32[2304, 768][768, 1]cuda:0", arg101_1: "f32[2304][1]cuda:0", arg102_1: "f32[768, 768][768, 1]cuda:0", arg103_1: "f32[768][1]cuda:0", arg104_1: "f32[768][1]cuda:0", arg105_1: "f32[768][1]cuda:0", arg106_1: "f32[3072, 768][768, 1]cuda:0", arg107_1: "f32[3072][1]cuda:0", arg108_1: "f32[768, 3072][3072, 1]cuda:0", arg109_1: "f32[768][1]cuda:0", arg110_1: "f32[768][1]cuda:0", arg111_1: "f32[768][1]cuda:0", arg112_1: "f32[2304, 768][768, 1]cuda:0", arg113_1: "f32[2304][1]cuda:0", arg114_1: "f32[768, 768][768, 1]cuda:0", arg115_1: "f32[768][1]cuda:0", arg116_1: "f32[768][1]cuda:0", arg117_1: "f32[768][1]cuda:0", arg118_1: "f32[3072, 768][768, 1]cuda:0", arg119_1: "f32[3072][1]cuda:0", arg120_1: "f32[768, 3072][3072, 1]cuda:0", arg121_1: "f32[768][1]cuda:0", arg122_1: "f32[768][1]cuda:0", arg123_1: "f32[768][1]cuda:0", arg124_1: "f32[2304, 768][768, 1]cuda:0", arg125_1: "f32[2304][1]cuda:0", arg126_1: "f32[768, 768][768, 1]cuda:0", arg127_1: "f32[768][1]cuda:0", arg128_1: "f32[768][1]cuda:0", arg129_1: "f32[768][1]cuda:0", arg130_1: "f32[3072, 768][768, 1]cuda:0", arg131_1: "f32[3072][1]cuda:0", arg132_1: "f32[768, 3072][3072, 1]cuda:0", arg133_1: "f32[768][1]cuda:0", arg134_1: "f32[768][1]cuda:0", arg135_1: "f32[768][1]cuda:0", arg136_1: "f32[2304, 768][768, 1]cuda:0", arg137_1: "f32[2304][1]cuda:0", arg138_1: "f32[768, 768][768, 1]cuda:0", arg139_1: "f32[768][1]cuda:0", arg140_1: "f32[768][1]cuda:0", arg141_1: "f32[768][1]cuda:0", arg142_1: "f32[3072, 768][768, 1]cuda:0", arg143_1: "f32[3072][1]cuda:0", arg144_1: "f32[768, 3072][3072, 1]cuda:0", arg145_1: "f32[768][1]cuda:0", arg146_1: "f32[768][1]cuda:0", arg147_1: "f32[768][1]cuda:0", arg148_1: "f32[][]cuda:0", arg149_1: "f32[1024, 768][768, 1]cuda:0", arg150_1: "f32[1024, 768][768, 1]cuda:0", arg151_1: "f32[50304, 768][768, 1]cuda:0", arg152_1: "f32[1024, 768][768, 1]cuda:0", arg153_1: "f32[768][1]cuda:0", arg154_1: "f32[768][1]cuda:0", arg155_1: "f32[2304, 768][768, 1]cuda:0", arg156_1: "f32[2304][1]cuda:0", arg157_1: "f32[768, 768][768, 1]cuda:0", arg158_1: "f32[768][1]cuda:0", arg159_1: "f32[768][1]cuda:0", arg160_1: "f32[768][1]cuda:0", arg161_1: "f32[3072, 768][768, 1]cuda:0", arg162_1: "f32[3072][1]cuda:0", arg163_1: "f32[768, 3072][3072, 1]cuda:0", arg164_1: "f32[768][1]cuda:0", arg165_1: "f32[768][1]cuda:0", arg166_1: "f32[768][1]cuda:0", arg167_1: "f32[2304, 768][768, 1]cuda:0", arg168_1: "f32[2304][1]cuda:0", arg169_1: "f32[768, 768][768, 1]cuda:0", arg170_1: "f32[768][1]cuda:0", arg171_1: "f32[768][1]cuda:0", arg172_1: "f32[768][1]cuda:0", arg173_1: "f32[3072, 768][768, 1]cuda:0", arg174_1: "f32[3072][1]cuda:0", arg175_1: "f32[768, 3072][3072, 1]cuda:0", arg176_1: "f32[768][1]cuda:0", arg177_1: "f32[768][1]cuda:0", arg178_1: "f32[768][1]cuda:0", arg179_1: "f32[2304, 768][768, 1]cuda:0", arg180_1: "f32[2304][1]cuda:0", arg181_1: "f32[768, 768][768, 1]cuda:0", arg182_1: "f32[768][1]cuda:0", arg183_1: "f32[768][1]cuda:0", arg184_1: "f32[768][1]cuda:0", arg185_1: "f32[3072, 768][768, 1]cuda:0", arg186_1: "f32[3072][1]cuda:0", arg187_1: "f32[768, 3072][3072, 1]cuda:0", arg188_1: "f32[768][1]cuda:0", arg189_1: "f32[768][1]cuda:0", arg190_1: "f32[768][1]cuda:0", arg191_1: "f32[2304, 768][768, 1]cuda:0", arg192_1: "f32[2304][1]cuda:0", arg193_1: "f32[768, 768][768, 1]cuda:0", arg194_1: "f32[768][1]cuda:0", arg195_1: "f32[768][1]cuda:0", arg196_1: "f32[768][1]cuda:0", arg197_1: "f32[3072, 768][768, 1]cuda:0", arg198_1: "f32[3072][1]cuda:0", arg199_1: "f32[768, 3072][3072, 1]cuda:0", arg200_1: "f32[768][1]cuda:0", arg201_1: "f32[768][1]cuda:0", arg202_1: "f32[768][1]cuda:0", arg203_1: "f32[2304, 768][768, 1]cuda:0", arg204_1: "f32[2304][1]cuda:0", arg205_1: "f32[768, 768][768, 1]cuda:0", arg206_1: "f32[768][1]cuda:0", arg207_1: "f32[768][1]cuda:0", arg208_1: "f32[768][1]cuda:0", arg209_1: "f32[3072, 768][768, 1]cuda:0", arg210_1: "f32[3072][1]cuda:0", arg211_1: "f32[768, 3072][3072, 1]cuda:0", arg212_1: "f32[768][1]cuda:0", arg213_1: "f32[768][1]cuda:0", arg214_1: "f32[768][1]cuda:0", arg215_1: "f32[2304, 768][768, 1]cuda:0", arg216_1: "f32[2304][1]cuda:0", arg217_1: "f32[768, 768][768, 1]cuda:0", arg218_1: "f32[768][1]cuda:0", arg219_1: "f32[768][1]cuda:0", arg220_1: "f32[768][1]cuda:0", arg221_1: "f32[3072, 768][768, 1]cuda:0", arg222_1: "f32[3072][1]cuda:0", arg223_1: "f32[768, 3072][3072, 1]cuda:0", arg224_1: "f32[768][1]cuda:0", arg225_1: "f32[768][1]cuda:0", arg226_1: "f32[768][1]cuda:0", arg227_1: "f32[2304, 768][768, 1]cuda:0", arg228_1: "f32[2304][1]cuda:0", arg229_1: "f32[768, 768][768, 1]cuda:0", arg230_1: "f32[768][1]cuda:0", arg231_1: "f32[768][1]cuda:0", arg232_1: "f32[768][1]cuda:0", arg233_1: "f32[3072, 768][768, 1]cuda:0", arg234_1: "f32[3072][1]cuda:0", arg235_1: "f32[768, 3072][3072, 1]cuda:0", arg236_1: "f32[768][1]cuda:0", arg237_1: "f32[768][1]cuda:0", arg238_1: "f32[768][1]cuda:0", arg239_1: "f32[2304, 768][768, 1]cuda:0", arg240_1: "f32[2304][1]cuda:0", arg241_1: "f32[768, 768][768, 1]cuda:0", arg242_1: "f32[768][1]cuda:0", arg243_1: "f32[768][1]cuda:0", arg244_1: "f32[768][1]cuda:0", arg245_1: "f32[3072, 768][768, 1]cuda:0", arg246_1: "f32[3072][1]cuda:0", arg247_1: "f32[768, 3072][3072, 1]cuda:0", arg248_1: "f32[768][1]cuda:0", arg249_1: "f32[768][1]cuda:0", arg250_1: "f32[768][1]cuda:0", arg251_1: "f32[2304, 768][768, 1]cuda:0", arg252_1: "f32[2304][1]cuda:0", arg253_1: "f32[768, 768][768, 1]cuda:0", arg254_1: "f32[768][1]cuda:0", arg255_1: "f32[768][1]cuda:0", arg256_1: "f32[768][1]cuda:0", arg257_1: "f32[3072, 768][768, 1]cuda:0", arg258_1: "f32[3072][1]cuda:0", arg259_1: "f32[768, 3072][3072, 1]cuda:0", arg260_1: "f32[768][1]cuda:0", arg261_1: "f32[768][1]cuda:0", arg262_1: "f32[768][1]cuda:0", arg263_1: "f32[2304, 768][768, 1]cuda:0", arg264_1: "f32[2304][1]cuda:0", arg265_1: "f32[768, 768][768, 1]cuda:0", arg266_1: "f32[768][1]cuda:0", arg267_1: "f32[768][1]cuda:0", arg268_1: "f32[768][1]cuda:0", arg269_1: "f32[3072, 768][768, 1]cuda:0", arg270_1: "f32[3072][1]cuda:0", arg271_1: "f32[768, 3072][3072, 1]cuda:0", arg272_1: "f32[768][1]cuda:0", arg273_1: "f32[768][1]cuda:0", arg274_1: "f32[768][1]cuda:0", arg275_1: "f32[2304, 768][768, 1]cuda:0", arg276_1: "f32[2304][1]cuda:0", arg277_1: "f32[768, 768][768, 1]cuda:0", arg278_1: "f32[768][1]cuda:0", arg279_1: "f32[768][1]cuda:0", arg280_1: "f32[768][1]cuda:0", arg281_1: "f32[3072, 768][768, 1]cuda:0", arg282_1: "f32[3072][1]cuda:0", arg283_1: "f32[768, 3072][3072, 1]cuda:0", arg284_1: "f32[768][1]cuda:0", arg285_1: "f32[768][1]cuda:0", arg286_1: "f32[768][1]cuda:0", arg287_1: "f32[2304, 768][768, 1]cuda:0", arg288_1: "f32[2304][1]cuda:0", arg289_1: "f32[768, 768][768, 1]cuda:0", arg290_1: "f32[768][1]cuda:0", arg291_1: "f32[768][1]cuda:0", arg292_1: "f32[768][1]cuda:0", arg293_1: "f32[3072, 768][768, 1]cuda:0", arg294_1: "f32[3072][1]cuda:0", arg295_1: "f32[768, 3072][3072, 1]cuda:0", arg296_1: "f32[768][1]cuda:0", arg297_1: "f32[768][1]cuda:0", arg298_1: "f32[768][1]cuda:0", arg299_1: "f32[50304, 768][768, 1]cuda:0", arg300_1: "f32[768][1]cuda:0", arg301_1: "f32[768][1]cuda:0", arg302_1: "f32[2304, 768][768, 1]cuda:0", arg303_1: "f32[2304][1]cuda:0", arg304_1: "f32[768, 768][768, 1]cuda:0", arg305_1: "f32[768][1]cuda:0", arg306_1: "f32[768][1]cuda:0", arg307_1: "f32[768][1]cuda:0", arg308_1: "f32[3072, 768][768, 1]cuda:0", arg309_1: "f32[3072][1]cuda:0", arg310_1: "f32[768, 3072][3072, 1]cuda:0", arg311_1: "f32[768][1]cuda:0", arg312_1: "f32[768][1]cuda:0", arg313_1: "f32[768][1]cuda:0", arg314_1: "f32[2304, 768][768, 1]cuda:0", arg315_1: "f32[2304][1]cuda:0", arg316_1: "f32[768, 768][768, 1]cuda:0", arg317_1: "f32[768][1]cuda:0", arg318_1: "f32[768][1]cuda:0", arg319_1: "f32[768][1]cuda:0", arg320_1: "f32[3072, 768][768, 1]cuda:0", arg321_1: "f32[3072][1]cuda:0", arg322_1: "f32[768, 3072][3072, 1]cuda:0", arg323_1: "f32[768][1]cuda:0", arg324_1: "f32[768][1]cuda:0", arg325_1: "f32[768][1]cuda:0", arg326_1: "f32[2304, 768][768, 1]cuda:0", arg327_1: "f32[2304][1]cuda:0", arg328_1: "f32[768, 768][768, 1]cuda:0", arg329_1: "f32[768][1]cuda:0", arg330_1: "f32[768][1]cuda:0", arg331_1: "f32[768][1]cuda:0", arg332_1: "f32[3072, 768][768, 1]cuda:0", arg333_1: "f32[3072][1]cuda:0", arg334_1: "f32[768, 3072][3072, 1]cuda:0", arg335_1: "f32[768][1]cuda:0", arg336_1: "f32[768][1]cuda:0", arg337_1: "f32[768][1]cuda:0", arg338_1: "f32[2304, 768][768, 1]cuda:0", arg339_1: "f32[2304][1]cuda:0", arg340_1: "f32[768, 768][768, 1]cuda:0", arg341_1: "f32[768][1]cuda:0", arg342_1: "f32[768][1]cuda:0", arg343_1: "f32[768][1]cuda:0", arg344_1: "f32[3072, 768][768, 1]cuda:0", arg345_1: "f32[3072][1]cuda:0", arg346_1: "f32[768, 3072][3072, 1]cuda:0", arg347_1: "f32[768][1]cuda:0", arg348_1: "f32[768][1]cuda:0", arg349_1: "f32[768][1]cuda:0", arg350_1: "f32[2304, 768][768, 1]cuda:0", arg351_1: "f32[2304][1]cuda:0", arg352_1: "f32[768, 768][768, 1]cuda:0", arg353_1: "f32[768][1]cuda:0", arg354_1: "f32[768][1]cuda:0", arg355_1: "f32[768][1]cuda:0", arg356_1: "f32[3072, 768][768, 1]cuda:0", arg357_1: "f32[3072][1]cuda:0", arg358_1: "f32[768, 3072][3072, 1]cuda:0", arg359_1: "f32[768][1]cuda:0", arg360_1: "f32[768][1]cuda:0", arg361_1: "f32[768][1]cuda:0", arg362_1: "f32[2304, 768][768, 1]cuda:0", arg363_1: "f32[2304][1]cuda:0", arg364_1: "f32[768, 768][768, 1]cuda:0", arg365_1: "f32[768][1]cuda:0", arg366_1: "f32[768][1]cuda:0", arg367_1: "f32[768][1]cuda:0", arg368_1: "f32[3072, 768][768, 1]cuda:0", arg369_1: "f32[3072][1]cuda:0", arg370_1: "f32[768, 3072][3072, 1]cuda:0", arg371_1: "f32[768][1]cuda:0", arg372_1: "f32[768][1]cuda:0", arg373_1: "f32[768][1]cuda:0", arg374_1: "f32[2304, 768][768, 1]cuda:0", arg375_1: "f32[2304][1]cuda:0", arg376_1: "f32[768, 768][768, 1]cuda:0", arg377_1: "f32[768][1]cuda:0", arg378_1: "f32[768][1]cuda:0", arg379_1: "f32[768][1]cuda:0", arg380_1: "f32[3072, 768][768, 1]cuda:0", arg381_1: "f32[3072][1]cuda:0", arg382_1: "f32[768, 3072][3072, 1]cuda:0", arg383_1: "f32[768][1]cuda:0", arg384_1: "f32[768][1]cuda:0", arg385_1: "f32[768][1]cuda:0", arg386_1: "f32[2304, 768][768, 1]cuda:0", arg387_1: "f32[2304][1]cuda:0", arg388_1: "f32[768, 768][768, 1]cuda:0", arg389_1: "f32[768][1]cuda:0", arg390_1: "f32[768][1]cuda:0", arg391_1: "f32[768][1]cuda:0", arg392_1: "f32[3072, 768][768, 1]cuda:0", arg393_1: "f32[3072][1]cuda:0", arg394_1: "f32[768, 3072][3072, 1]cuda:0", arg395_1: "f32[768][1]cuda:0", arg396_1: "f32[768][1]cuda:0", arg397_1: "f32[768][1]cuda:0", arg398_1: "f32[2304, 768][768, 1]cuda:0", arg399_1: "f32[2304][1]cuda:0", arg400_1: "f32[768, 768][768, 1]cuda:0", arg401_1: "f32[768][1]cuda:0", arg402_1: "f32[768][1]cuda:0", arg403_1: "f32[768][1]cuda:0", arg404_1: "f32[3072, 768][768, 1]cuda:0", arg405_1: "f32[3072][1]cuda:0", arg406_1: "f32[768, 3072][3072, 1]cuda:0", arg407_1: "f32[768][1]cuda:0", arg408_1: "f32[768][1]cuda:0", arg409_1: "f32[768][1]cuda:0", arg410_1: "f32[2304, 768][768, 1]cuda:0", arg411_1: "f32[2304][1]cuda:0", arg412_1: "f32[768, 768][768, 1]cuda:0", arg413_1: "f32[768][1]cuda:0", arg414_1: "f32[768][1]cuda:0", arg415_1: "f32[768][1]cuda:0", arg416_1: "f32[3072, 768][768, 1]cuda:0", arg417_1: "f32[3072][1]cuda:0", arg418_1: "f32[768, 3072][3072, 1]cuda:0", arg419_1: "f32[768][1]cuda:0", arg420_1: "f32[768][1]cuda:0", arg421_1: "f32[768][1]cuda:0", arg422_1: "f32[2304, 768][768, 1]cuda:0", arg423_1: "f32[2304][1]cuda:0", arg424_1: "f32[768, 768][768, 1]cuda:0", arg425_1: "f32[768][1]cuda:0", arg426_1: "f32[768][1]cuda:0", arg427_1: "f32[768][1]cuda:0", arg428_1: "f32[3072, 768][768, 1]cuda:0", arg429_1: "f32[3072][1]cuda:0", arg430_1: "f32[768, 3072][3072, 1]cuda:0", arg431_1: "f32[768][1]cuda:0", arg432_1: "f32[768][1]cuda:0", arg433_1: "f32[768][1]cuda:0", arg434_1: "f32[2304, 768][768, 1]cuda:0", arg435_1: "f32[2304][1]cuda:0", arg436_1: "f32[768, 768][768, 1]cuda:0", arg437_1: "f32[768][1]cuda:0", arg438_1: "f32[768][1]cuda:0", arg439_1: "f32[768][1]cuda:0", arg440_1: "f32[3072, 768][768, 1]cuda:0", arg441_1: "f32[3072][1]cuda:0", arg442_1: "f32[768, 3072][3072, 1]cuda:0", arg443_1: "f32[768][1]cuda:0", arg444_1: "f32[768][1]cuda:0", arg445_1: "f32[768][1]cuda:0", arg446_1: "f32[50304, 768][768, 1]cuda:0", arg447_1: "f32[768][1]cuda:0", arg448_1: "f32[768][1]cuda:0", arg449_1: "f32[2304, 768][768, 1]cuda:0", arg450_1: "f32[2304][1]cuda:0", arg451_1: "f32[768, 768][768, 1]cuda:0", arg452_1: "f32[768][1]cuda:0", arg453_1: "f32[768][1]cuda:0", arg454_1: "f32[768][1]cuda:0", arg455_1: "f32[3072, 768][768, 1]cuda:0", arg456_1: "f32[3072][1]cuda:0", arg457_1: "f32[768, 3072][3072, 1]cuda:0", arg458_1: "f32[768][1]cuda:0", arg459_1: "f32[768][1]cuda:0", arg460_1: "f32[768][1]cuda:0", arg461_1: "f32[2304, 768][768, 1]cuda:0", arg462_1: "f32[2304][1]cuda:0", arg463_1: "f32[768, 768][768, 1]cuda:0", arg464_1: "f32[768][1]cuda:0", arg465_1: "f32[768][1]cuda:0", arg466_1: "f32[768][1]cuda:0", arg467_1: "f32[3072, 768][768, 1]cuda:0", arg468_1: "f32[3072][1]cuda:0", arg469_1: "f32[768, 3072][3072, 1]cuda:0", arg470_1: "f32[768][1]cuda:0", arg471_1: "f32[768][1]cuda:0", arg472_1: "f32[768][1]cuda:0", arg473_1: "f32[2304, 768][768, 1]cuda:0", arg474_1: "f32[2304][1]cuda:0", arg475_1: "f32[768, 768][768, 1]cuda:0", arg476_1: "f32[768][1]cuda:0", arg477_1: "f32[768][1]cuda:0", arg478_1: "f32[768][1]cuda:0", arg479_1: "f32[3072, 768][768, 1]cuda:0", arg480_1: "f32[3072][1]cuda:0", arg481_1: "f32[768, 3072][3072, 1]cuda:0", arg482_1: "f32[768][1]cuda:0", arg483_1: "f32[768][1]cuda:0", arg484_1: "f32[768][1]cuda:0", arg485_1: "f32[2304, 768][768, 1]cuda:0", arg486_1: "f32[2304][1]cuda:0", arg487_1: "f32[768, 768][768, 1]cuda:0", arg488_1: "f32[768][1]cuda:0", arg489_1: "f32[768][1]cuda:0", arg490_1: "f32[768][1]cuda:0", arg491_1: "f32[3072, 768][768, 1]cuda:0", arg492_1: "f32[3072][1]cuda:0", arg493_1: "f32[768, 3072][3072, 1]cuda:0", arg494_1: "f32[768][1]cuda:0", arg495_1: "f32[768][1]cuda:0", arg496_1: "f32[768][1]cuda:0", arg497_1: "f32[2304, 768][768, 1]cuda:0", arg498_1: "f32[2304][1]cuda:0", arg499_1: "f32[768, 768][768, 1]cuda:0", arg500_1: "f32[768][1]cuda:0", arg501_1: "f32[768][1]cuda:0", arg502_1: "f32[768][1]cuda:0", arg503_1: "f32[3072, 768][768, 1]cuda:0", arg504_1: "f32[3072][1]cuda:0", arg505_1: "f32[768, 3072][3072, 1]cuda:0", arg506_1: "f32[768][1]cuda:0", arg507_1: "f32[768][1]cuda:0", arg508_1: "f32[768][1]cuda:0", arg509_1: "f32[2304, 768][768, 1]cuda:0", arg510_1: "f32[2304][1]cuda:0", arg511_1: "f32[768, 768][768, 1]cuda:0", arg512_1: "f32[768][1]cuda:0", arg513_1: "f32[768][1]cuda:0", arg514_1: "f32[768][1]cuda:0", arg515_1: "f32[3072, 768][768, 1]cuda:0", arg516_1: "f32[3072][1]cuda:0", arg517_1: "f32[768, 3072][3072, 1]cuda:0", arg518_1: "f32[768][1]cuda:0", arg519_1: "f32[768][1]cuda:0", arg520_1: "f32[768][1]cuda:0", arg521_1: "f32[2304, 768][768, 1]cuda:0", arg522_1: "f32[2304][1]cuda:0", arg523_1: "f32[768, 768][768, 1]cuda:0", arg524_1: "f32[768][1]cuda:0", arg525_1: "f32[768][1]cuda:0", arg526_1: "f32[768][1]cuda:0", arg527_1: "f32[3072, 768][768, 1]cuda:0", arg528_1: "f32[3072][1]cuda:0", arg529_1: "f32[768, 3072][3072, 1]cuda:0", arg530_1: "f32[768][1]cuda:0", arg531_1: "f32[768][1]cuda:0", arg532_1: "f32[768][1]cuda:0", arg533_1: "f32[2304, 768][768, 1]cuda:0", arg534_1: "f32[2304][1]cuda:0", arg535_1: "f32[768, 768][768, 1]cuda:0", arg536_1: "f32[768][1]cuda:0", arg537_1: "f32[768][1]cuda:0", arg538_1: "f32[768][1]cuda:0", arg539_1: "f32[3072, 768][768, 1]cuda:0", arg540_1: "f32[3072][1]cuda:0", arg541_1: "f32[768, 3072][3072, 1]cuda:0", arg542_1: "f32[768][1]cuda:0", arg543_1: "f32[768][1]cuda:0", arg544_1: "f32[768][1]cuda:0", arg545_1: "f32[2304, 768][768, 1]cuda:0", arg546_1: "f32[2304][1]cuda:0", arg547_1: "f32[768, 768][768, 1]cuda:0", arg548_1: "f32[768][1]cuda:0", arg549_1: "f32[768][1]cuda:0", arg550_1: "f32[768][1]cuda:0", arg551_1: "f32[3072, 768][768, 1]cuda:0", arg552_1: "f32[3072][1]cuda:0", arg553_1: "f32[768, 3072][3072, 1]cuda:0", arg554_1: "f32[768][1]cuda:0", arg555_1: "f32[768][1]cuda:0", arg556_1: "f32[768][1]cuda:0", arg557_1: "f32[2304, 768][768, 1]cuda:0", arg558_1: "f32[2304][1]cuda:0", arg559_1: "f32[768, 768][768, 1]cuda:0", arg560_1: "f32[768][1]cuda:0", arg561_1: "f32[768][1]cuda:0", arg562_1: "f32[768][1]cuda:0", arg563_1: "f32[3072, 768][768, 1]cuda:0", arg564_1: "f32[3072][1]cuda:0", arg565_1: "f32[768, 3072][3072, 1]cuda:0", arg566_1: "f32[768][1]cuda:0", arg567_1: "f32[768][1]cuda:0", arg568_1: "f32[768][1]cuda:0", arg569_1: "f32[2304, 768][768, 1]cuda:0", arg570_1: "f32[2304][1]cuda:0", arg571_1: "f32[768, 768][768, 1]cuda:0", arg572_1: "f32[768][1]cuda:0", arg573_1: "f32[768][1]cuda:0", arg574_1: "f32[768][1]cuda:0", arg575_1: "f32[3072, 768][768, 1]cuda:0", arg576_1: "f32[3072][1]cuda:0", arg577_1: "f32[768, 3072][3072, 1]cuda:0", arg578_1: "f32[768][1]cuda:0", arg579_1: "f32[768][1]cuda:0", arg580_1: "f32[768][1]cuda:0", arg581_1: "f32[2304, 768][768, 1]cuda:0", arg582_1: "f32[2304][1]cuda:0", arg583_1: "f32[768, 768][768, 1]cuda:0", arg584_1: "f32[768][1]cuda:0", arg585_1: "f32[768][1]cuda:0", arg586_1: "f32[768][1]cuda:0", arg587_1: "f32[3072, 768][768, 1]cuda:0", arg588_1: "f32[3072][1]cuda:0", arg589_1: "f32[768, 3072][3072, 1]cuda:0", arg590_1: "f32[768][1]cuda:0", arg591_1: "f32[768][1]cuda:0", arg592_1: "f32[768][1]cuda:0", arg593_1: "f32[][]cuda:0", arg594_1: "f32[][]cuda:0", arg595_1: "f32[][]cuda:0", arg596_1: "f32[][]cuda:0", arg597_1: "f32[][]cuda:0", arg598_1: "f32[][]cuda:0", arg599_1: "f32[][]cuda:0", arg600_1: "f32[][]cuda:0", arg601_1: "f32[][]cuda:0", arg602_1: "f32[][]cuda:0", arg603_1: "f32[][]cuda:0", arg604_1: "f32[][]cuda:0", arg605_1: "f32[][]cuda:0", arg606_1: "f32[][]cuda:0", arg607_1: "f32[][]cuda:0", arg608_1: "f32[][]cuda:0", arg609_1: "f32[][]cuda:0", arg610_1: "f32[][]cuda:0", arg611_1: "f32[][]cuda:0", arg612_1: "f32[][]cuda:0", arg613_1: "f32[][]cuda:0", arg614_1: "f32[][]cuda:0", arg615_1: "f32[][]cuda:0", arg616_1: "f32[][]cuda:0", arg617_1: "f32[][]cuda:0", arg618_1: "f32[][]cuda:0", arg619_1: "f32[][]cuda:0", arg620_1: "f32[][]cuda:0", arg621_1: "f32[][]cuda:0", arg622_1: "f32[][]cuda:0", arg623_1: "f32[][]cuda:0", arg624_1: "f32[][]cuda:0", arg625_1: "f32[][]cuda:0", arg626_1: "f32[][]cuda:0", arg627_1: "f32[][]cuda:0", arg628_1: "f32[][]cuda:0", arg629_1: "f32[][]cuda:0", arg630_1: "f32[][]cuda:0", arg631_1: "f32[][]cuda:0", arg632_1: "f32[][]cuda:0", arg633_1: "f32[][]cuda:0", arg634_1: "f32[][]cuda:0", arg635_1: "f32[][]cuda:0", arg636_1: "f32[][]cuda:0", arg637_1: "f32[][]cuda:0", arg638_1: "f32[][]cuda:0", arg639_1: "f32[][]cuda:0", arg640_1: "f32[][]cuda:0", arg641_1: "f32[][]cuda:0", arg642_1: "f32[][]cuda:0", arg643_1: "f32[][]cuda:0", arg644_1: "f32[][]cuda:0", arg645_1: "f32[][]cuda:0", arg646_1: "f32[][]cuda:0", arg647_1: "f32[][]cuda:0", arg648_1: "f32[][]cuda:0", arg649_1: "f32[][]cuda:0", arg650_1: "f32[][]cuda:0", arg651_1: "f32[][]cuda:0", arg652_1: "f32[][]cuda:0", arg653_1: "f32[][]cuda:0", arg654_1: "f32[][]cuda:0", arg655_1: "f32[][]cuda:0", arg656_1: "f32[][]cuda:0", arg657_1: "f32[][]cuda:0", arg658_1: "f32[][]cuda:0", arg659_1: "f32[][]cuda:0", arg660_1: "f32[][]cuda:0", arg661_1: "f32[][]cuda:0", arg662_1: "f32[][]cuda:0", arg663_1: "f32[][]cuda:0", arg664_1: "f32[][]cuda:0", arg665_1: "f32[][]cuda:0", arg666_1: "f32[][]cuda:0", arg667_1: "f32[][]cuda:0", arg668_1: "f32[][]cuda:0", arg669_1: "f32[][]cuda:0", arg670_1: "f32[][]cuda:0", arg671_1: "f32[][]cuda:0", arg672_1: "f32[][]cuda:0", arg673_1: "f32[][]cuda:0", arg674_1: "f32[][]cuda:0", arg675_1: "f32[][]cuda:0", arg676_1: "f32[][]cuda:0", arg677_1: "f32[][]cuda:0", arg678_1: "f32[][]cuda:0", arg679_1: "f32[][]cuda:0", arg680_1: "f32[][]cuda:0", arg681_1: "f32[][]cuda:0", arg682_1: "f32[][]cuda:0", arg683_1: "f32[][]cuda:0", arg684_1: "f32[][]cuda:0", arg685_1: "f32[][]cuda:0", arg686_1: "f32[][]cuda:0", arg687_1: "f32[][]cuda:0", arg688_1: "f32[][]cuda:0", arg689_1: "f32[][]cuda:0", arg690_1: "f32[][]cuda:0", arg691_1: "f32[][]cuda:0", arg692_1: "f32[][]cuda:0", arg693_1: "f32[][]cuda:0", arg694_1: "f32[][]cuda:0", arg695_1: "f32[][]cuda:0", arg696_1: "f32[][]cuda:0", arg697_1: "f32[][]cuda:0", arg698_1: "f32[][]cuda:0", arg699_1: "f32[][]cuda:0", arg700_1: "f32[][]cuda:0", arg701_1: "f32[][]cuda:0", arg702_1: "f32[][]cuda:0", arg703_1: "f32[][]cuda:0", arg704_1: "f32[][]cuda:0", arg705_1: "f32[][]cuda:0", arg706_1: "f32[][]cuda:0", arg707_1: "f32[][]cuda:0", arg708_1: "f32[][]cuda:0", arg709_1: "f32[][]cuda:0", arg710_1: "f32[][]cuda:0", arg711_1: "f32[][]cuda:0", arg712_1: "f32[][]cuda:0", arg713_1: "f32[][]cuda:0", arg714_1: "f32[][]cuda:0", arg715_1: "f32[][]cuda:0", arg716_1: "f32[][]cuda:0", arg717_1: "f32[][]cuda:0", arg718_1: "f32[][]cuda:0", arg719_1: "f32[][]cuda:0", arg720_1: "f32[][]cuda:0", arg721_1: "f32[][]cuda:0", arg722_1: "f32[][]cuda:0", arg723_1: "f32[][]cuda:0", arg724_1: "f32[][]cuda:0", arg725_1: "f32[][]cuda:0", arg726_1: "f32[][]cuda:0", arg727_1: "f32[][]cuda:0", arg728_1: "f32[][]cuda:0", arg729_1: "f32[][]cuda:0", arg730_1: "f32[][]cuda:0", arg731_1: "f32[][]cuda:0", arg732_1: "f32[][]cuda:0", arg733_1: "f32[][]cuda:0", arg734_1: "f32[][]cuda:0", arg735_1: "f32[][]cuda:0", arg736_1: "f32[][]cuda:0", arg737_1: "f32[][]cuda:0", arg738_1: "f32[][]cuda:0", arg739_1: "f32[][]cuda:0"):
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:529 in _multi_tensor_adam, code: torch._foreach_add_(device_state_steps, 1)
+	        _foreach_add = torch.ops.aten._foreach_add_.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1);  arg593_1 = arg148_1 = arg594_1 = arg595_1 = arg596_1 = arg597_1 = arg598_1 = arg599_1 = arg600_1 = arg601_1 = arg602_1 = arg603_1 = arg604_1 = arg605_1 = arg606_1 = arg607_1 = arg608_1 = arg609_1 = arg610_1 = arg611_1 = arg612_1 = arg613_1 = arg614_1 = arg615_1 = arg616_1 = arg617_1 = arg618_1 = arg619_1 = arg620_1 = arg621_1 = arg622_1 = arg623_1 = arg624_1 = arg625_1 = arg626_1 = arg627_1 = arg628_1 = arg629_1 = arg630_1 = arg631_1 = arg632_1 = arg633_1 = arg634_1 = arg635_1 = arg636_1 = arg637_1 = arg638_1 = arg639_1 = arg640_1 = arg641_1 = arg642_1 = arg643_1 = arg644_1 = arg645_1 = arg646_1 = arg647_1 = arg648_1 = arg649_1 = arg650_1 = arg651_1 = arg652_1 = arg653_1 = arg654_1 = arg655_1 = arg656_1 = arg657_1 = arg658_1 = arg659_1 = arg660_1 = arg661_1 = arg662_1 = arg663_1 = arg664_1 = arg665_1 = arg666_1 = arg667_1 = arg668_1 = arg669_1 = arg670_1 = arg671_1 = arg672_1 = arg673_1 = arg674_1 = arg675_1 = arg676_1 = arg677_1 = arg678_1 = arg679_1 = arg680_1 = arg681_1 = arg682_1 = arg683_1 = arg684_1 = arg685_1 = arg686_1 = arg687_1 = arg688_1 = arg689_1 = arg690_1 = arg691_1 = arg692_1 = arg693_1 = arg694_1 = arg695_1 = arg696_1 = arg697_1 = arg698_1 = arg699_1 = arg700_1 = arg701_1 = arg702_1 = arg703_1 = arg704_1 = arg705_1 = arg706_1 = arg707_1 = arg708_1 = arg709_1 = arg710_1 = arg711_1 = arg712_1 = arg713_1 = arg714_1 = arg715_1 = arg716_1 = arg717_1 = arg718_1 = arg719_1 = arg720_1 = arg721_1 = arg722_1 = arg723_1 = arg724_1 = arg725_1 = arg726_1 = arg727_1 = arg728_1 = arg729_1 = arg730_1 = arg731_1 = arg732_1 = arg733_1 = arg734_1 = arg735_1 = arg736_1 = arg737_1 = arg738_1 = arg739_1 = None
+	        getitem: "f32[][]cuda:0" = _foreach_add[0]
+	        getitem_1: "f32[][]cuda:0" = _foreach_add[1]
+	        getitem_2: "f32[][]cuda:0" = _foreach_add[2]
+	        getitem_3: "f32[][]cuda:0" = _foreach_add[3]
+	        getitem_4: "f32[][]cuda:0" = _foreach_add[4]
+	        getitem_5: "f32[][]cuda:0" = _foreach_add[5]
+	        getitem_6: "f32[][]cuda:0" = _foreach_add[6]
+	        getitem_7: "f32[][]cuda:0" = _foreach_add[7]
+	        getitem_8: "f32[][]cuda:0" = _foreach_add[8]
+	        getitem_9: "f32[][]cuda:0" = _foreach_add[9]
+	        getitem_10: "f32[][]cuda:0" = _foreach_add[10]
+	        getitem_11: "f32[][]cuda:0" = _foreach_add[11]
+	        getitem_12: "f32[][]cuda:0" = _foreach_add[12]
+	        getitem_13: "f32[][]cuda:0" = _foreach_add[13]
+	        getitem_14: "f32[][]cuda:0" = _foreach_add[14]
+	        getitem_15: "f32[][]cuda:0" = _foreach_add[15]
+	        getitem_16: "f32[][]cuda:0" = _foreach_add[16]
+	        getitem_17: "f32[][]cuda:0" = _foreach_add[17]
+	        getitem_18: "f32[][]cuda:0" = _foreach_add[18]
+	        getitem_19: "f32[][]cuda:0" = _foreach_add[19]
+	        getitem_20: "f32[][]cuda:0" = _foreach_add[20]
+	        getitem_21: "f32[][]cuda:0" = _foreach_add[21]
+	        getitem_22: "f32[][]cuda:0" = _foreach_add[22]
+	        getitem_23: "f32[][]cuda:0" = _foreach_add[23]
+	        getitem_24: "f32[][]cuda:0" = _foreach_add[24]
+	        getitem_25: "f32[][]cuda:0" = _foreach_add[25]
+	        getitem_26: "f32[][]cuda:0" = _foreach_add[26]
+	        getitem_27: "f32[][]cuda:0" = _foreach_add[27]
+	        getitem_28: "f32[][]cuda:0" = _foreach_add[28]
+	        getitem_29: "f32[][]cuda:0" = _foreach_add[29]
+	        getitem_30: "f32[][]cuda:0" = _foreach_add[30]
+	        getitem_31: "f32[][]cuda:0" = _foreach_add[31]
+	        getitem_32: "f32[][]cuda:0" = _foreach_add[32]
+	        getitem_33: "f32[][]cuda:0" = _foreach_add[33]
+	        getitem_34: "f32[][]cuda:0" = _foreach_add[34]
+	        getitem_35: "f32[][]cuda:0" = _foreach_add[35]
+	        getitem_36: "f32[][]cuda:0" = _foreach_add[36]
+	        getitem_37: "f32[][]cuda:0" = _foreach_add[37]
+	        getitem_38: "f32[][]cuda:0" = _foreach_add[38]
+	        getitem_39: "f32[][]cuda:0" = _foreach_add[39]
+	        getitem_40: "f32[][]cuda:0" = _foreach_add[40]
+	        getitem_41: "f32[][]cuda:0" = _foreach_add[41]
+	        getitem_42: "f32[][]cuda:0" = _foreach_add[42]
+	        getitem_43: "f32[][]cuda:0" = _foreach_add[43]
+	        getitem_44: "f32[][]cuda:0" = _foreach_add[44]
+	        getitem_45: "f32[][]cuda:0" = _foreach_add[45]
+	        getitem_46: "f32[][]cuda:0" = _foreach_add[46]
+	        getitem_47: "f32[][]cuda:0" = _foreach_add[47]
+	        getitem_48: "f32[][]cuda:0" = _foreach_add[48]
+	        getitem_49: "f32[][]cuda:0" = _foreach_add[49]
+	        getitem_50: "f32[][]cuda:0" = _foreach_add[50]
+	        getitem_51: "f32[][]cuda:0" = _foreach_add[51]
+	        getitem_52: "f32[][]cuda:0" = _foreach_add[52]
+	        getitem_53: "f32[][]cuda:0" = _foreach_add[53]
+	        getitem_54: "f32[][]cuda:0" = _foreach_add[54]
+	        getitem_55: "f32[][]cuda:0" = _foreach_add[55]
+	        getitem_56: "f32[][]cuda:0" = _foreach_add[56]
+	        getitem_57: "f32[][]cuda:0" = _foreach_add[57]
+	        getitem_58: "f32[][]cuda:0" = _foreach_add[58]
+	        getitem_59: "f32[][]cuda:0" = _foreach_add[59]
+	        getitem_60: "f32[][]cuda:0" = _foreach_add[60]
+	        getitem_61: "f32[][]cuda:0" = _foreach_add[61]
+	        getitem_62: "f32[][]cuda:0" = _foreach_add[62]
+	        getitem_63: "f32[][]cuda:0" = _foreach_add[63]
+	        getitem_64: "f32[][]cuda:0" = _foreach_add[64]
+	        getitem_65: "f32[][]cuda:0" = _foreach_add[65]
+	        getitem_66: "f32[][]cuda:0" = _foreach_add[66]
+	        getitem_67: "f32[][]cuda:0" = _foreach_add[67]
+	        getitem_68: "f32[][]cuda:0" = _foreach_add[68]
+	        getitem_69: "f32[][]cuda:0" = _foreach_add[69]
+	        getitem_70: "f32[][]cuda:0" = _foreach_add[70]
+	        getitem_71: "f32[][]cuda:0" = _foreach_add[71]
+	        getitem_72: "f32[][]cuda:0" = _foreach_add[72]
+	        getitem_73: "f32[][]cuda:0" = _foreach_add[73]
+	        getitem_74: "f32[][]cuda:0" = _foreach_add[74]
+	        getitem_75: "f32[][]cuda:0" = _foreach_add[75]
+	        getitem_76: "f32[][]cuda:0" = _foreach_add[76]
+	        getitem_77: "f32[][]cuda:0" = _foreach_add[77]
+	        getitem_78: "f32[][]cuda:0" = _foreach_add[78]
+	        getitem_79: "f32[][]cuda:0" = _foreach_add[79]
+	        getitem_80: "f32[][]cuda:0" = _foreach_add[80]
+	        getitem_81: "f32[][]cuda:0" = _foreach_add[81]
+	        getitem_82: "f32[][]cuda:0" = _foreach_add[82]
+	        getitem_83: "f32[][]cuda:0" = _foreach_add[83]
+	        getitem_84: "f32[][]cuda:0" = _foreach_add[84]
+	        getitem_85: "f32[][]cuda:0" = _foreach_add[85]
+	        getitem_86: "f32[][]cuda:0" = _foreach_add[86]
+	        getitem_87: "f32[][]cuda:0" = _foreach_add[87]
+	        getitem_88: "f32[][]cuda:0" = _foreach_add[88]
+	        getitem_89: "f32[][]cuda:0" = _foreach_add[89]
+	        getitem_90: "f32[][]cuda:0" = _foreach_add[90]
+	        getitem_91: "f32[][]cuda:0" = _foreach_add[91]
+	        getitem_92: "f32[][]cuda:0" = _foreach_add[92]
+	        getitem_93: "f32[][]cuda:0" = _foreach_add[93]
+	        getitem_94: "f32[][]cuda:0" = _foreach_add[94]
+	        getitem_95: "f32[][]cuda:0" = _foreach_add[95]
+	        getitem_96: "f32[][]cuda:0" = _foreach_add[96]
+	        getitem_97: "f32[][]cuda:0" = _foreach_add[97]
+	        getitem_98: "f32[][]cuda:0" = _foreach_add[98]
+	        getitem_99: "f32[][]cuda:0" = _foreach_add[99]
+	        getitem_100: "f32[][]cuda:0" = _foreach_add[100]
+	        getitem_101: "f32[][]cuda:0" = _foreach_add[101]
+	        getitem_102: "f32[][]cuda:0" = _foreach_add[102]
+	        getitem_103: "f32[][]cuda:0" = _foreach_add[103]
+	        getitem_104: "f32[][]cuda:0" = _foreach_add[104]
+	        getitem_105: "f32[][]cuda:0" = _foreach_add[105]
+	        getitem_106: "f32[][]cuda:0" = _foreach_add[106]
+	        getitem_107: "f32[][]cuda:0" = _foreach_add[107]
+	        getitem_108: "f32[][]cuda:0" = _foreach_add[108]
+	        getitem_109: "f32[][]cuda:0" = _foreach_add[109]
+	        getitem_110: "f32[][]cuda:0" = _foreach_add[110]
+	        getitem_111: "f32[][]cuda:0" = _foreach_add[111]
+	        getitem_112: "f32[][]cuda:0" = _foreach_add[112]
+	        getitem_113: "f32[][]cuda:0" = _foreach_add[113]
+	        getitem_114: "f32[][]cuda:0" = _foreach_add[114]
+	        getitem_115: "f32[][]cuda:0" = _foreach_add[115]
+	        getitem_116: "f32[][]cuda:0" = _foreach_add[116]
+	        getitem_117: "f32[][]cuda:0" = _foreach_add[117]
+	        getitem_118: "f32[][]cuda:0" = _foreach_add[118]
+	        getitem_119: "f32[][]cuda:0" = _foreach_add[119]
+	        getitem_120: "f32[][]cuda:0" = _foreach_add[120]
+	        getitem_121: "f32[][]cuda:0" = _foreach_add[121]
+	        getitem_122: "f32[][]cuda:0" = _foreach_add[122]
+	        getitem_123: "f32[][]cuda:0" = _foreach_add[123]
+	        getitem_124: "f32[][]cuda:0" = _foreach_add[124]
+	        getitem_125: "f32[][]cuda:0" = _foreach_add[125]
+	        getitem_126: "f32[][]cuda:0" = _foreach_add[126]
+	        getitem_127: "f32[][]cuda:0" = _foreach_add[127]
+	        getitem_128: "f32[][]cuda:0" = _foreach_add[128]
+	        getitem_129: "f32[][]cuda:0" = _foreach_add[129]
+	        getitem_130: "f32[][]cuda:0" = _foreach_add[130]
+	        getitem_131: "f32[][]cuda:0" = _foreach_add[131]
+	        getitem_132: "f32[][]cuda:0" = _foreach_add[132]
+	        getitem_133: "f32[][]cuda:0" = _foreach_add[133]
+	        getitem_134: "f32[][]cuda:0" = _foreach_add[134]
+	        getitem_135: "f32[][]cuda:0" = _foreach_add[135]
+	        getitem_136: "f32[][]cuda:0" = _foreach_add[136]
+	        getitem_137: "f32[][]cuda:0" = _foreach_add[137]
+	        getitem_138: "f32[][]cuda:0" = _foreach_add[138]
+	        getitem_139: "f32[][]cuda:0" = _foreach_add[139]
+	        getitem_140: "f32[][]cuda:0" = _foreach_add[140]
+	        getitem_141: "f32[][]cuda:0" = _foreach_add[141]
+	        getitem_142: "f32[][]cuda:0" = _foreach_add[142]
+	        getitem_143: "f32[][]cuda:0" = _foreach_add[143]
+	        getitem_144: "f32[][]cuda:0" = _foreach_add[144]
+	        getitem_145: "f32[][]cuda:0" = _foreach_add[145]
+	        getitem_146: "f32[][]cuda:0" = _foreach_add[146]
+	        getitem_147: "f32[][]cuda:0" = _foreach_add[147];  _foreach_add = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:541 in _multi_tensor_adam, code: torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
+	        _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])
+	        getitem_148: "f32[50304, 768][768, 1]cuda:0" = _foreach_sub[0]
+	        getitem_149: "f32[1024, 768][768, 1]cuda:0" = _foreach_sub[1]
+	        getitem_150: "f32[768][1]cuda:0" = _foreach_sub[2]
+	        getitem_151: "f32[768][1]cuda:0" = _foreach_sub[3]
+	        getitem_152: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[4]
+	        getitem_153: "f32[2304][1]cuda:0" = _foreach_sub[5]
+	        getitem_154: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[6]
+	        getitem_155: "f32[768][1]cuda:0" = _foreach_sub[7]
+	        getitem_156: "f32[768][1]cuda:0" = _foreach_sub[8]
+	        getitem_157: "f32[768][1]cuda:0" = _foreach_sub[9]
+	        getitem_158: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[10]
+	        getitem_159: "f32[3072][1]cuda:0" = _foreach_sub[11]
+	        getitem_160: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[12]
+	        getitem_161: "f32[768][1]cuda:0" = _foreach_sub[13]
+	        getitem_162: "f32[768][1]cuda:0" = _foreach_sub[14]
+	        getitem_163: "f32[768][1]cuda:0" = _foreach_sub[15]
+	        getitem_164: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[16]
+	        getitem_165: "f32[2304][1]cuda:0" = _foreach_sub[17]
+	        getitem_166: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[18]
+	        getitem_167: "f32[768][1]cuda:0" = _foreach_sub[19]
+	        getitem_168: "f32[768][1]cuda:0" = _foreach_sub[20]
+	        getitem_169: "f32[768][1]cuda:0" = _foreach_sub[21]
+	        getitem_170: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[22]
+	        getitem_171: "f32[3072][1]cuda:0" = _foreach_sub[23]
+	        getitem_172: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[24]
+	        getitem_173: "f32[768][1]cuda:0" = _foreach_sub[25]
+	        getitem_174: "f32[768][1]cuda:0" = _foreach_sub[26]
+	        getitem_175: "f32[768][1]cuda:0" = _foreach_sub[27]
+	        getitem_176: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[28]
+	        getitem_177: "f32[2304][1]cuda:0" = _foreach_sub[29]
+	        getitem_178: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[30]
+	        getitem_179: "f32[768][1]cuda:0" = _foreach_sub[31]
+	        getitem_180: "f32[768][1]cuda:0" = _foreach_sub[32]
+	        getitem_181: "f32[768][1]cuda:0" = _foreach_sub[33]
+	        getitem_182: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[34]
+	        getitem_183: "f32[3072][1]cuda:0" = _foreach_sub[35]
+	        getitem_184: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[36]
+	        getitem_185: "f32[768][1]cuda:0" = _foreach_sub[37]
+	        getitem_186: "f32[768][1]cuda:0" = _foreach_sub[38]
+	        getitem_187: "f32[768][1]cuda:0" = _foreach_sub[39]
+	        getitem_188: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[40]
+	        getitem_189: "f32[2304][1]cuda:0" = _foreach_sub[41]
+	        getitem_190: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[42]
+	        getitem_191: "f32[768][1]cuda:0" = _foreach_sub[43]
+	        getitem_192: "f32[768][1]cuda:0" = _foreach_sub[44]
+	        getitem_193: "f32[768][1]cuda:0" = _foreach_sub[45]
+	        getitem_194: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[46]
+	        getitem_195: "f32[3072][1]cuda:0" = _foreach_sub[47]
+	        getitem_196: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[48]
+	        getitem_197: "f32[768][1]cuda:0" = _foreach_sub[49]
+	        getitem_198: "f32[768][1]cuda:0" = _foreach_sub[50]
+	        getitem_199: "f32[768][1]cuda:0" = _foreach_sub[51]
+	        getitem_200: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[52]
+	        getitem_201: "f32[2304][1]cuda:0" = _foreach_sub[53]
+	        getitem_202: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[54]
+	        getitem_203: "f32[768][1]cuda:0" = _foreach_sub[55]
+	        getitem_204: "f32[768][1]cuda:0" = _foreach_sub[56]
+	        getitem_205: "f32[768][1]cuda:0" = _foreach_sub[57]
+	        getitem_206: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[58]
+	        getitem_207: "f32[3072][1]cuda:0" = _foreach_sub[59]
+	        getitem_208: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[60]
+	        getitem_209: "f32[768][1]cuda:0" = _foreach_sub[61]
+	        getitem_210: "f32[768][1]cuda:0" = _foreach_sub[62]
+	        getitem_211: "f32[768][1]cuda:0" = _foreach_sub[63]
+	        getitem_212: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[64]
+	        getitem_213: "f32[2304][1]cuda:0" = _foreach_sub[65]
+	        getitem_214: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[66]
+	        getitem_215: "f32[768][1]cuda:0" = _foreach_sub[67]
+	        getitem_216: "f32[768][1]cuda:0" = _foreach_sub[68]
+	        getitem_217: "f32[768][1]cuda:0" = _foreach_sub[69]
+	        getitem_218: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[70]
+	        getitem_219: "f32[3072][1]cuda:0" = _foreach_sub[71]
+	        getitem_220: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[72]
+	        getitem_221: "f32[768][1]cuda:0" = _foreach_sub[73]
+	        getitem_222: "f32[768][1]cuda:0" = _foreach_sub[74]
+	        getitem_223: "f32[768][1]cuda:0" = _foreach_sub[75]
+	        getitem_224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[76]
+	        getitem_225: "f32[2304][1]cuda:0" = _foreach_sub[77]
+	        getitem_226: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[78]
+	        getitem_227: "f32[768][1]cuda:0" = _foreach_sub[79]
+	        getitem_228: "f32[768][1]cuda:0" = _foreach_sub[80]
+	        getitem_229: "f32[768][1]cuda:0" = _foreach_sub[81]
+	        getitem_230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[82]
+	        getitem_231: "f32[3072][1]cuda:0" = _foreach_sub[83]
+	        getitem_232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[84]
+	        getitem_233: "f32[768][1]cuda:0" = _foreach_sub[85]
+	        getitem_234: "f32[768][1]cuda:0" = _foreach_sub[86]
+	        getitem_235: "f32[768][1]cuda:0" = _foreach_sub[87]
+	        getitem_236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[88]
+	        getitem_237: "f32[2304][1]cuda:0" = _foreach_sub[89]
+	        getitem_238: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[90]
+	        getitem_239: "f32[768][1]cuda:0" = _foreach_sub[91]
+	        getitem_240: "f32[768][1]cuda:0" = _foreach_sub[92]
+	        getitem_241: "f32[768][1]cuda:0" = _foreach_sub[93]
+	        getitem_242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[94]
+	        getitem_243: "f32[3072][1]cuda:0" = _foreach_sub[95]
+	        getitem_244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[96]
+	        getitem_245: "f32[768][1]cuda:0" = _foreach_sub[97]
+	        getitem_246: "f32[768][1]cuda:0" = _foreach_sub[98]
+	        getitem_247: "f32[768][1]cuda:0" = _foreach_sub[99]
+	        getitem_248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[100]
+	        getitem_249: "f32[2304][1]cuda:0" = _foreach_sub[101]
+	        getitem_250: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[102]
+	        getitem_251: "f32[768][1]cuda:0" = _foreach_sub[103]
+	        getitem_252: "f32[768][1]cuda:0" = _foreach_sub[104]
+	        getitem_253: "f32[768][1]cuda:0" = _foreach_sub[105]
+	        getitem_254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[106]
+	        getitem_255: "f32[3072][1]cuda:0" = _foreach_sub[107]
+	        getitem_256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[108]
+	        getitem_257: "f32[768][1]cuda:0" = _foreach_sub[109]
+	        getitem_258: "f32[768][1]cuda:0" = _foreach_sub[110]
+	        getitem_259: "f32[768][1]cuda:0" = _foreach_sub[111]
+	        getitem_260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[112]
+	        getitem_261: "f32[2304][1]cuda:0" = _foreach_sub[113]
+	        getitem_262: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[114]
+	        getitem_263: "f32[768][1]cuda:0" = _foreach_sub[115]
+	        getitem_264: "f32[768][1]cuda:0" = _foreach_sub[116]
+	        getitem_265: "f32[768][1]cuda:0" = _foreach_sub[117]
+	        getitem_266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[118]
+	        getitem_267: "f32[3072][1]cuda:0" = _foreach_sub[119]
+	        getitem_268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[120]
+	        getitem_269: "f32[768][1]cuda:0" = _foreach_sub[121]
+	        getitem_270: "f32[768][1]cuda:0" = _foreach_sub[122]
+	        getitem_271: "f32[768][1]cuda:0" = _foreach_sub[123]
+	        getitem_272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[124]
+	        getitem_273: "f32[2304][1]cuda:0" = _foreach_sub[125]
+	        getitem_274: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[126]
+	        getitem_275: "f32[768][1]cuda:0" = _foreach_sub[127]
+	        getitem_276: "f32[768][1]cuda:0" = _foreach_sub[128]
+	        getitem_277: "f32[768][1]cuda:0" = _foreach_sub[129]
+	        getitem_278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[130]
+	        getitem_279: "f32[3072][1]cuda:0" = _foreach_sub[131]
+	        getitem_280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[132]
+	        getitem_281: "f32[768][1]cuda:0" = _foreach_sub[133]
+	        getitem_282: "f32[768][1]cuda:0" = _foreach_sub[134]
+	        getitem_283: "f32[768][1]cuda:0" = _foreach_sub[135]
+	        getitem_284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sub[136]
+	        getitem_285: "f32[2304][1]cuda:0" = _foreach_sub[137]
+	        getitem_286: "f32[768, 768][768, 1]cuda:0" = _foreach_sub[138]
+	        getitem_287: "f32[768][1]cuda:0" = _foreach_sub[139]
+	        getitem_288: "f32[768][1]cuda:0" = _foreach_sub[140]
+	        getitem_289: "f32[768][1]cuda:0" = _foreach_sub[141]
+	        getitem_290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sub[142]
+	        getitem_291: "f32[3072][1]cuda:0" = _foreach_sub[143]
+	        getitem_292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sub[144]
+	        getitem_293: "f32[768][1]cuda:0" = _foreach_sub[145]
+	        getitem_294: "f32[768][1]cuda:0" = _foreach_sub[146]
+	        getitem_295: "f32[768][1]cuda:0" = _foreach_sub[147];  _foreach_sub = None
+	        _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998);  getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None
+	        getitem_296: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul[0]
+	        getitem_297: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul[1]
+	        getitem_298: "f32[768][1]cuda:0" = _foreach_mul[2]
+	        getitem_299: "f32[768][1]cuda:0" = _foreach_mul[3]
+	        getitem_300: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[4]
+	        getitem_301: "f32[2304][1]cuda:0" = _foreach_mul[5]
+	        getitem_302: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[6]
+	        getitem_303: "f32[768][1]cuda:0" = _foreach_mul[7]
+	        getitem_304: "f32[768][1]cuda:0" = _foreach_mul[8]
+	        getitem_305: "f32[768][1]cuda:0" = _foreach_mul[9]
+	        getitem_306: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[10]
+	        getitem_307: "f32[3072][1]cuda:0" = _foreach_mul[11]
+	        getitem_308: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[12]
+	        getitem_309: "f32[768][1]cuda:0" = _foreach_mul[13]
+	        getitem_310: "f32[768][1]cuda:0" = _foreach_mul[14]
+	        getitem_311: "f32[768][1]cuda:0" = _foreach_mul[15]
+	        getitem_312: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[16]
+	        getitem_313: "f32[2304][1]cuda:0" = _foreach_mul[17]
+	        getitem_314: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[18]
+	        getitem_315: "f32[768][1]cuda:0" = _foreach_mul[19]
+	        getitem_316: "f32[768][1]cuda:0" = _foreach_mul[20]
+	        getitem_317: "f32[768][1]cuda:0" = _foreach_mul[21]
+	        getitem_318: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[22]
+	        getitem_319: "f32[3072][1]cuda:0" = _foreach_mul[23]
+	        getitem_320: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[24]
+	        getitem_321: "f32[768][1]cuda:0" = _foreach_mul[25]
+	        getitem_322: "f32[768][1]cuda:0" = _foreach_mul[26]
+	        getitem_323: "f32[768][1]cuda:0" = _foreach_mul[27]
+	        getitem_324: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[28]
+	        getitem_325: "f32[2304][1]cuda:0" = _foreach_mul[29]
+	        getitem_326: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[30]
+	        getitem_327: "f32[768][1]cuda:0" = _foreach_mul[31]
+	        getitem_328: "f32[768][1]cuda:0" = _foreach_mul[32]
+	        getitem_329: "f32[768][1]cuda:0" = _foreach_mul[33]
+	        getitem_330: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[34]
+	        getitem_331: "f32[3072][1]cuda:0" = _foreach_mul[35]
+	        getitem_332: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[36]
+	        getitem_333: "f32[768][1]cuda:0" = _foreach_mul[37]
+	        getitem_334: "f32[768][1]cuda:0" = _foreach_mul[38]
+	        getitem_335: "f32[768][1]cuda:0" = _foreach_mul[39]
+	        getitem_336: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[40]
+	        getitem_337: "f32[2304][1]cuda:0" = _foreach_mul[41]
+	        getitem_338: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[42]
+	        getitem_339: "f32[768][1]cuda:0" = _foreach_mul[43]
+	        getitem_340: "f32[768][1]cuda:0" = _foreach_mul[44]
+	        getitem_341: "f32[768][1]cuda:0" = _foreach_mul[45]
+	        getitem_342: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[46]
+	        getitem_343: "f32[3072][1]cuda:0" = _foreach_mul[47]
+	        getitem_344: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[48]
+	        getitem_345: "f32[768][1]cuda:0" = _foreach_mul[49]
+	        getitem_346: "f32[768][1]cuda:0" = _foreach_mul[50]
+	        getitem_347: "f32[768][1]cuda:0" = _foreach_mul[51]
+	        getitem_348: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[52]
+	        getitem_349: "f32[2304][1]cuda:0" = _foreach_mul[53]
+	        getitem_350: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[54]
+	        getitem_351: "f32[768][1]cuda:0" = _foreach_mul[55]
+	        getitem_352: "f32[768][1]cuda:0" = _foreach_mul[56]
+	        getitem_353: "f32[768][1]cuda:0" = _foreach_mul[57]
+	        getitem_354: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[58]
+	        getitem_355: "f32[3072][1]cuda:0" = _foreach_mul[59]
+	        getitem_356: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[60]
+	        getitem_357: "f32[768][1]cuda:0" = _foreach_mul[61]
+	        getitem_358: "f32[768][1]cuda:0" = _foreach_mul[62]
+	        getitem_359: "f32[768][1]cuda:0" = _foreach_mul[63]
+	        getitem_360: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[64]
+	        getitem_361: "f32[2304][1]cuda:0" = _foreach_mul[65]
+	        getitem_362: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[66]
+	        getitem_363: "f32[768][1]cuda:0" = _foreach_mul[67]
+	        getitem_364: "f32[768][1]cuda:0" = _foreach_mul[68]
+	        getitem_365: "f32[768][1]cuda:0" = _foreach_mul[69]
+	        getitem_366: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[70]
+	        getitem_367: "f32[3072][1]cuda:0" = _foreach_mul[71]
+	        getitem_368: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[72]
+	        getitem_369: "f32[768][1]cuda:0" = _foreach_mul[73]
+	        getitem_370: "f32[768][1]cuda:0" = _foreach_mul[74]
+	        getitem_371: "f32[768][1]cuda:0" = _foreach_mul[75]
+	        getitem_372: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[76]
+	        getitem_373: "f32[2304][1]cuda:0" = _foreach_mul[77]
+	        getitem_374: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[78]
+	        getitem_375: "f32[768][1]cuda:0" = _foreach_mul[79]
+	        getitem_376: "f32[768][1]cuda:0" = _foreach_mul[80]
+	        getitem_377: "f32[768][1]cuda:0" = _foreach_mul[81]
+	        getitem_378: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[82]
+	        getitem_379: "f32[3072][1]cuda:0" = _foreach_mul[83]
+	        getitem_380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[84]
+	        getitem_381: "f32[768][1]cuda:0" = _foreach_mul[85]
+	        getitem_382: "f32[768][1]cuda:0" = _foreach_mul[86]
+	        getitem_383: "f32[768][1]cuda:0" = _foreach_mul[87]
+	        getitem_384: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[88]
+	        getitem_385: "f32[2304][1]cuda:0" = _foreach_mul[89]
+	        getitem_386: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[90]
+	        getitem_387: "f32[768][1]cuda:0" = _foreach_mul[91]
+	        getitem_388: "f32[768][1]cuda:0" = _foreach_mul[92]
+	        getitem_389: "f32[768][1]cuda:0" = _foreach_mul[93]
+	        getitem_390: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[94]
+	        getitem_391: "f32[3072][1]cuda:0" = _foreach_mul[95]
+	        getitem_392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[96]
+	        getitem_393: "f32[768][1]cuda:0" = _foreach_mul[97]
+	        getitem_394: "f32[768][1]cuda:0" = _foreach_mul[98]
+	        getitem_395: "f32[768][1]cuda:0" = _foreach_mul[99]
+	        getitem_396: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[100]
+	        getitem_397: "f32[2304][1]cuda:0" = _foreach_mul[101]
+	        getitem_398: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[102]
+	        getitem_399: "f32[768][1]cuda:0" = _foreach_mul[103]
+	        getitem_400: "f32[768][1]cuda:0" = _foreach_mul[104]
+	        getitem_401: "f32[768][1]cuda:0" = _foreach_mul[105]
+	        getitem_402: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[106]
+	        getitem_403: "f32[3072][1]cuda:0" = _foreach_mul[107]
+	        getitem_404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[108]
+	        getitem_405: "f32[768][1]cuda:0" = _foreach_mul[109]
+	        getitem_406: "f32[768][1]cuda:0" = _foreach_mul[110]
+	        getitem_407: "f32[768][1]cuda:0" = _foreach_mul[111]
+	        getitem_408: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[112]
+	        getitem_409: "f32[2304][1]cuda:0" = _foreach_mul[113]
+	        getitem_410: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[114]
+	        getitem_411: "f32[768][1]cuda:0" = _foreach_mul[115]
+	        getitem_412: "f32[768][1]cuda:0" = _foreach_mul[116]
+	        getitem_413: "f32[768][1]cuda:0" = _foreach_mul[117]
+	        getitem_414: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[118]
+	        getitem_415: "f32[3072][1]cuda:0" = _foreach_mul[119]
+	        getitem_416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[120]
+	        getitem_417: "f32[768][1]cuda:0" = _foreach_mul[121]
+	        getitem_418: "f32[768][1]cuda:0" = _foreach_mul[122]
+	        getitem_419: "f32[768][1]cuda:0" = _foreach_mul[123]
+	        getitem_420: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[124]
+	        getitem_421: "f32[2304][1]cuda:0" = _foreach_mul[125]
+	        getitem_422: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[126]
+	        getitem_423: "f32[768][1]cuda:0" = _foreach_mul[127]
+	        getitem_424: "f32[768][1]cuda:0" = _foreach_mul[128]
+	        getitem_425: "f32[768][1]cuda:0" = _foreach_mul[129]
+	        getitem_426: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[130]
+	        getitem_427: "f32[3072][1]cuda:0" = _foreach_mul[131]
+	        getitem_428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[132]
+	        getitem_429: "f32[768][1]cuda:0" = _foreach_mul[133]
+	        getitem_430: "f32[768][1]cuda:0" = _foreach_mul[134]
+	        getitem_431: "f32[768][1]cuda:0" = _foreach_mul[135]
+	        getitem_432: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul[136]
+	        getitem_433: "f32[2304][1]cuda:0" = _foreach_mul[137]
+	        getitem_434: "f32[768, 768][768, 1]cuda:0" = _foreach_mul[138]
+	        getitem_435: "f32[768][1]cuda:0" = _foreach_mul[139]
+	        getitem_436: "f32[768][1]cuda:0" = _foreach_mul[140]
+	        getitem_437: "f32[768][1]cuda:0" = _foreach_mul[141]
+	        getitem_438: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul[142]
+	        getitem_439: "f32[3072][1]cuda:0" = _foreach_mul[143]
+	        getitem_440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul[144]
+	        getitem_441: "f32[768][1]cuda:0" = _foreach_mul[145]
+	        getitem_442: "f32[768][1]cuda:0" = _foreach_mul[146]
+	        getitem_443: "f32[768][1]cuda:0" = _foreach_mul[147];  _foreach_mul = None
+	        _foreach_add_1 = torch.ops.aten._foreach_add_.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]);  arg299_1 = arg149_1 = arg300_1 = arg301_1 = arg302_1 = arg303_1 = arg304_1 = arg305_1 = arg306_1 = arg307_1 = arg308_1 = arg309_1 = arg310_1 = arg311_1 = arg312_1 = arg313_1 = arg314_1 = arg315_1 = arg316_1 = arg317_1 = arg318_1 = arg319_1 = arg320_1 = arg321_1 = arg322_1 = arg323_1 = arg324_1 = arg325_1 = arg326_1 = arg327_1 = arg328_1 = arg329_1 = arg330_1 = arg331_1 = arg332_1 = arg333_1 = arg334_1 = arg335_1 = arg336_1 = arg337_1 = arg338_1 = arg339_1 = arg340_1 = arg341_1 = arg342_1 = arg343_1 = arg344_1 = arg345_1 = arg346_1 = arg347_1 = arg348_1 = arg349_1 = arg350_1 = arg351_1 = arg352_1 = arg353_1 = arg354_1 = arg355_1 = arg356_1 = arg357_1 = arg358_1 = arg359_1 = arg360_1 = arg361_1 = arg362_1 = arg363_1 = arg364_1 = arg365_1 = arg366_1 = arg367_1 = arg368_1 = arg369_1 = arg370_1 = arg371_1 = arg372_1 = arg373_1 = arg374_1 = arg375_1 = arg376_1 = arg377_1 = arg378_1 = arg379_1 = arg380_1 = arg381_1 = arg382_1 = arg383_1 = arg384_1 = arg385_1 = arg386_1 = arg387_1 = arg388_1 = arg389_1 = arg390_1 = arg391_1 = arg392_1 = arg393_1 = arg394_1 = arg395_1 = arg396_1 = arg397_1 = arg398_1 = arg399_1 = arg400_1 = arg401_1 = arg402_1 = arg403_1 = arg404_1 = arg405_1 = arg406_1 = arg407_1 = arg408_1 = arg409_1 = arg410_1 = arg411_1 = arg412_1 = arg413_1 = arg414_1 = arg415_1 = arg416_1 = arg417_1 = arg418_1 = arg419_1 = arg420_1 = arg421_1 = arg422_1 = arg423_1 = arg424_1 = arg425_1 = arg426_1 = arg427_1 = arg428_1 = arg429_1 = arg430_1 = arg431_1 = arg432_1 = arg433_1 = arg434_1 = arg435_1 = arg436_1 = arg437_1 = arg438_1 = arg439_1 = arg440_1 = arg441_1 = arg442_1 = arg443_1 = arg444_1 = arg445_1 = getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None
+	        getitem_444: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_1[0]
+	        getitem_445: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_1[1]
+	        getitem_446: "f32[768][1]cuda:0" = _foreach_add_1[2]
+	        getitem_447: "f32[768][1]cuda:0" = _foreach_add_1[3]
+	        getitem_448: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[4]
+	        getitem_449: "f32[2304][1]cuda:0" = _foreach_add_1[5]
+	        getitem_450: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[6]
+	        getitem_451: "f32[768][1]cuda:0" = _foreach_add_1[7]
+	        getitem_452: "f32[768][1]cuda:0" = _foreach_add_1[8]
+	        getitem_453: "f32[768][1]cuda:0" = _foreach_add_1[9]
+	        getitem_454: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[10]
+	        getitem_455: "f32[3072][1]cuda:0" = _foreach_add_1[11]
+	        getitem_456: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[12]
+	        getitem_457: "f32[768][1]cuda:0" = _foreach_add_1[13]
+	        getitem_458: "f32[768][1]cuda:0" = _foreach_add_1[14]
+	        getitem_459: "f32[768][1]cuda:0" = _foreach_add_1[15]
+	        getitem_460: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[16]
+	        getitem_461: "f32[2304][1]cuda:0" = _foreach_add_1[17]
+	        getitem_462: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[18]
+	        getitem_463: "f32[768][1]cuda:0" = _foreach_add_1[19]
+	        getitem_464: "f32[768][1]cuda:0" = _foreach_add_1[20]
+	        getitem_465: "f32[768][1]cuda:0" = _foreach_add_1[21]
+	        getitem_466: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[22]
+	        getitem_467: "f32[3072][1]cuda:0" = _foreach_add_1[23]
+	        getitem_468: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[24]
+	        getitem_469: "f32[768][1]cuda:0" = _foreach_add_1[25]
+	        getitem_470: "f32[768][1]cuda:0" = _foreach_add_1[26]
+	        getitem_471: "f32[768][1]cuda:0" = _foreach_add_1[27]
+	        getitem_472: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[28]
+	        getitem_473: "f32[2304][1]cuda:0" = _foreach_add_1[29]
+	        getitem_474: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[30]
+	        getitem_475: "f32[768][1]cuda:0" = _foreach_add_1[31]
+	        getitem_476: "f32[768][1]cuda:0" = _foreach_add_1[32]
+	        getitem_477: "f32[768][1]cuda:0" = _foreach_add_1[33]
+	        getitem_478: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[34]
+	        getitem_479: "f32[3072][1]cuda:0" = _foreach_add_1[35]
+	        getitem_480: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[36]
+	        getitem_481: "f32[768][1]cuda:0" = _foreach_add_1[37]
+	        getitem_482: "f32[768][1]cuda:0" = _foreach_add_1[38]
+	        getitem_483: "f32[768][1]cuda:0" = _foreach_add_1[39]
+	        getitem_484: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[40]
+	        getitem_485: "f32[2304][1]cuda:0" = _foreach_add_1[41]
+	        getitem_486: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[42]
+	        getitem_487: "f32[768][1]cuda:0" = _foreach_add_1[43]
+	        getitem_488: "f32[768][1]cuda:0" = _foreach_add_1[44]
+	        getitem_489: "f32[768][1]cuda:0" = _foreach_add_1[45]
+	        getitem_490: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[46]
+	        getitem_491: "f32[3072][1]cuda:0" = _foreach_add_1[47]
+	        getitem_492: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[48]
+	        getitem_493: "f32[768][1]cuda:0" = _foreach_add_1[49]
+	        getitem_494: "f32[768][1]cuda:0" = _foreach_add_1[50]
+	        getitem_495: "f32[768][1]cuda:0" = _foreach_add_1[51]
+	        getitem_496: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[52]
+	        getitem_497: "f32[2304][1]cuda:0" = _foreach_add_1[53]
+	        getitem_498: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[54]
+	        getitem_499: "f32[768][1]cuda:0" = _foreach_add_1[55]
+	        getitem_500: "f32[768][1]cuda:0" = _foreach_add_1[56]
+	        getitem_501: "f32[768][1]cuda:0" = _foreach_add_1[57]
+	        getitem_502: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[58]
+	        getitem_503: "f32[3072][1]cuda:0" = _foreach_add_1[59]
+	        getitem_504: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[60]
+	        getitem_505: "f32[768][1]cuda:0" = _foreach_add_1[61]
+	        getitem_506: "f32[768][1]cuda:0" = _foreach_add_1[62]
+	        getitem_507: "f32[768][1]cuda:0" = _foreach_add_1[63]
+	        getitem_508: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[64]
+	        getitem_509: "f32[2304][1]cuda:0" = _foreach_add_1[65]
+	        getitem_510: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[66]
+	        getitem_511: "f32[768][1]cuda:0" = _foreach_add_1[67]
+	        getitem_512: "f32[768][1]cuda:0" = _foreach_add_1[68]
+	        getitem_513: "f32[768][1]cuda:0" = _foreach_add_1[69]
+	        getitem_514: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[70]
+	        getitem_515: "f32[3072][1]cuda:0" = _foreach_add_1[71]
+	        getitem_516: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[72]
+	        getitem_517: "f32[768][1]cuda:0" = _foreach_add_1[73]
+	        getitem_518: "f32[768][1]cuda:0" = _foreach_add_1[74]
+	        getitem_519: "f32[768][1]cuda:0" = _foreach_add_1[75]
+	        getitem_520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[76]
+	        getitem_521: "f32[2304][1]cuda:0" = _foreach_add_1[77]
+	        getitem_522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[78]
+	        getitem_523: "f32[768][1]cuda:0" = _foreach_add_1[79]
+	        getitem_524: "f32[768][1]cuda:0" = _foreach_add_1[80]
+	        getitem_525: "f32[768][1]cuda:0" = _foreach_add_1[81]
+	        getitem_526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[82]
+	        getitem_527: "f32[3072][1]cuda:0" = _foreach_add_1[83]
+	        getitem_528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[84]
+	        getitem_529: "f32[768][1]cuda:0" = _foreach_add_1[85]
+	        getitem_530: "f32[768][1]cuda:0" = _foreach_add_1[86]
+	        getitem_531: "f32[768][1]cuda:0" = _foreach_add_1[87]
+	        getitem_532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[88]
+	        getitem_533: "f32[2304][1]cuda:0" = _foreach_add_1[89]
+	        getitem_534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[90]
+	        getitem_535: "f32[768][1]cuda:0" = _foreach_add_1[91]
+	        getitem_536: "f32[768][1]cuda:0" = _foreach_add_1[92]
+	        getitem_537: "f32[768][1]cuda:0" = _foreach_add_1[93]
+	        getitem_538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[94]
+	        getitem_539: "f32[3072][1]cuda:0" = _foreach_add_1[95]
+	        getitem_540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[96]
+	        getitem_541: "f32[768][1]cuda:0" = _foreach_add_1[97]
+	        getitem_542: "f32[768][1]cuda:0" = _foreach_add_1[98]
+	        getitem_543: "f32[768][1]cuda:0" = _foreach_add_1[99]
+	        getitem_544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[100]
+	        getitem_545: "f32[2304][1]cuda:0" = _foreach_add_1[101]
+	        getitem_546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[102]
+	        getitem_547: "f32[768][1]cuda:0" = _foreach_add_1[103]
+	        getitem_548: "f32[768][1]cuda:0" = _foreach_add_1[104]
+	        getitem_549: "f32[768][1]cuda:0" = _foreach_add_1[105]
+	        getitem_550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[106]
+	        getitem_551: "f32[3072][1]cuda:0" = _foreach_add_1[107]
+	        getitem_552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[108]
+	        getitem_553: "f32[768][1]cuda:0" = _foreach_add_1[109]
+	        getitem_554: "f32[768][1]cuda:0" = _foreach_add_1[110]
+	        getitem_555: "f32[768][1]cuda:0" = _foreach_add_1[111]
+	        getitem_556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[112]
+	        getitem_557: "f32[2304][1]cuda:0" = _foreach_add_1[113]
+	        getitem_558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[114]
+	        getitem_559: "f32[768][1]cuda:0" = _foreach_add_1[115]
+	        getitem_560: "f32[768][1]cuda:0" = _foreach_add_1[116]
+	        getitem_561: "f32[768][1]cuda:0" = _foreach_add_1[117]
+	        getitem_562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[118]
+	        getitem_563: "f32[3072][1]cuda:0" = _foreach_add_1[119]
+	        getitem_564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[120]
+	        getitem_565: "f32[768][1]cuda:0" = _foreach_add_1[121]
+	        getitem_566: "f32[768][1]cuda:0" = _foreach_add_1[122]
+	        getitem_567: "f32[768][1]cuda:0" = _foreach_add_1[123]
+	        getitem_568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[124]
+	        getitem_569: "f32[2304][1]cuda:0" = _foreach_add_1[125]
+	        getitem_570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[126]
+	        getitem_571: "f32[768][1]cuda:0" = _foreach_add_1[127]
+	        getitem_572: "f32[768][1]cuda:0" = _foreach_add_1[128]
+	        getitem_573: "f32[768][1]cuda:0" = _foreach_add_1[129]
+	        getitem_574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[130]
+	        getitem_575: "f32[3072][1]cuda:0" = _foreach_add_1[131]
+	        getitem_576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[132]
+	        getitem_577: "f32[768][1]cuda:0" = _foreach_add_1[133]
+	        getitem_578: "f32[768][1]cuda:0" = _foreach_add_1[134]
+	        getitem_579: "f32[768][1]cuda:0" = _foreach_add_1[135]
+	        getitem_580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_1[136]
+	        getitem_581: "f32[2304][1]cuda:0" = _foreach_add_1[137]
+	        getitem_582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_1[138]
+	        getitem_583: "f32[768][1]cuda:0" = _foreach_add_1[139]
+	        getitem_584: "f32[768][1]cuda:0" = _foreach_add_1[140]
+	        getitem_585: "f32[768][1]cuda:0" = _foreach_add_1[141]
+	        getitem_586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_1[142]
+	        getitem_587: "f32[3072][1]cuda:0" = _foreach_add_1[143]
+	        getitem_588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_1[144]
+	        getitem_589: "f32[768][1]cuda:0" = _foreach_add_1[145]
+	        getitem_590: "f32[768][1]cuda:0" = _foreach_add_1[146]
+	        getitem_591: "f32[768][1]cuda:0" = _foreach_add_1[147];  _foreach_add_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:543 in _multi_tensor_adam, code: torch._foreach_mul_(device_exp_avg_sqs, beta2)
+	        _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)
+	        getitem_592: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_1[0]
+	        getitem_593: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_1[1]
+	        getitem_594: "f32[768][1]cuda:0" = _foreach_mul_1[2]
+	        getitem_595: "f32[768][1]cuda:0" = _foreach_mul_1[3]
+	        getitem_596: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[4]
+	        getitem_597: "f32[2304][1]cuda:0" = _foreach_mul_1[5]
+	        getitem_598: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[6]
+	        getitem_599: "f32[768][1]cuda:0" = _foreach_mul_1[7]
+	        getitem_600: "f32[768][1]cuda:0" = _foreach_mul_1[8]
+	        getitem_601: "f32[768][1]cuda:0" = _foreach_mul_1[9]
+	        getitem_602: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[10]
+	        getitem_603: "f32[3072][1]cuda:0" = _foreach_mul_1[11]
+	        getitem_604: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[12]
+	        getitem_605: "f32[768][1]cuda:0" = _foreach_mul_1[13]
+	        getitem_606: "f32[768][1]cuda:0" = _foreach_mul_1[14]
+	        getitem_607: "f32[768][1]cuda:0" = _foreach_mul_1[15]
+	        getitem_608: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[16]
+	        getitem_609: "f32[2304][1]cuda:0" = _foreach_mul_1[17]
+	        getitem_610: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[18]
+	        getitem_611: "f32[768][1]cuda:0" = _foreach_mul_1[19]
+	        getitem_612: "f32[768][1]cuda:0" = _foreach_mul_1[20]
+	        getitem_613: "f32[768][1]cuda:0" = _foreach_mul_1[21]
+	        getitem_614: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[22]
+	        getitem_615: "f32[3072][1]cuda:0" = _foreach_mul_1[23]
+	        getitem_616: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[24]
+	        getitem_617: "f32[768][1]cuda:0" = _foreach_mul_1[25]
+	        getitem_618: "f32[768][1]cuda:0" = _foreach_mul_1[26]
+	        getitem_619: "f32[768][1]cuda:0" = _foreach_mul_1[27]
+	        getitem_620: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[28]
+	        getitem_621: "f32[2304][1]cuda:0" = _foreach_mul_1[29]
+	        getitem_622: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[30]
+	        getitem_623: "f32[768][1]cuda:0" = _foreach_mul_1[31]
+	        getitem_624: "f32[768][1]cuda:0" = _foreach_mul_1[32]
+	        getitem_625: "f32[768][1]cuda:0" = _foreach_mul_1[33]
+	        getitem_626: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[34]
+	        getitem_627: "f32[3072][1]cuda:0" = _foreach_mul_1[35]
+	        getitem_628: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[36]
+	        getitem_629: "f32[768][1]cuda:0" = _foreach_mul_1[37]
+	        getitem_630: "f32[768][1]cuda:0" = _foreach_mul_1[38]
+	        getitem_631: "f32[768][1]cuda:0" = _foreach_mul_1[39]
+	        getitem_632: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[40]
+	        getitem_633: "f32[2304][1]cuda:0" = _foreach_mul_1[41]
+	        getitem_634: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[42]
+	        getitem_635: "f32[768][1]cuda:0" = _foreach_mul_1[43]
+	        getitem_636: "f32[768][1]cuda:0" = _foreach_mul_1[44]
+	        getitem_637: "f32[768][1]cuda:0" = _foreach_mul_1[45]
+	        getitem_638: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[46]
+	        getitem_639: "f32[3072][1]cuda:0" = _foreach_mul_1[47]
+	        getitem_640: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[48]
+	        getitem_641: "f32[768][1]cuda:0" = _foreach_mul_1[49]
+	        getitem_642: "f32[768][1]cuda:0" = _foreach_mul_1[50]
+	        getitem_643: "f32[768][1]cuda:0" = _foreach_mul_1[51]
+	        getitem_644: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[52]
+	        getitem_645: "f32[2304][1]cuda:0" = _foreach_mul_1[53]
+	        getitem_646: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[54]
+	        getitem_647: "f32[768][1]cuda:0" = _foreach_mul_1[55]
+	        getitem_648: "f32[768][1]cuda:0" = _foreach_mul_1[56]
+	        getitem_649: "f32[768][1]cuda:0" = _foreach_mul_1[57]
+	        getitem_650: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[58]
+	        getitem_651: "f32[3072][1]cuda:0" = _foreach_mul_1[59]
+	        getitem_652: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[60]
+	        getitem_653: "f32[768][1]cuda:0" = _foreach_mul_1[61]
+	        getitem_654: "f32[768][1]cuda:0" = _foreach_mul_1[62]
+	        getitem_655: "f32[768][1]cuda:0" = _foreach_mul_1[63]
+	        getitem_656: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[64]
+	        getitem_657: "f32[2304][1]cuda:0" = _foreach_mul_1[65]
+	        getitem_658: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[66]
+	        getitem_659: "f32[768][1]cuda:0" = _foreach_mul_1[67]
+	        getitem_660: "f32[768][1]cuda:0" = _foreach_mul_1[68]
+	        getitem_661: "f32[768][1]cuda:0" = _foreach_mul_1[69]
+	        getitem_662: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[70]
+	        getitem_663: "f32[3072][1]cuda:0" = _foreach_mul_1[71]
+	        getitem_664: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[72]
+	        getitem_665: "f32[768][1]cuda:0" = _foreach_mul_1[73]
+	        getitem_666: "f32[768][1]cuda:0" = _foreach_mul_1[74]
+	        getitem_667: "f32[768][1]cuda:0" = _foreach_mul_1[75]
+	        getitem_668: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[76]
+	        getitem_669: "f32[2304][1]cuda:0" = _foreach_mul_1[77]
+	        getitem_670: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[78]
+	        getitem_671: "f32[768][1]cuda:0" = _foreach_mul_1[79]
+	        getitem_672: "f32[768][1]cuda:0" = _foreach_mul_1[80]
+	        getitem_673: "f32[768][1]cuda:0" = _foreach_mul_1[81]
+	        getitem_674: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[82]
+	        getitem_675: "f32[3072][1]cuda:0" = _foreach_mul_1[83]
+	        getitem_676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[84]
+	        getitem_677: "f32[768][1]cuda:0" = _foreach_mul_1[85]
+	        getitem_678: "f32[768][1]cuda:0" = _foreach_mul_1[86]
+	        getitem_679: "f32[768][1]cuda:0" = _foreach_mul_1[87]
+	        getitem_680: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[88]
+	        getitem_681: "f32[2304][1]cuda:0" = _foreach_mul_1[89]
+	        getitem_682: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[90]
+	        getitem_683: "f32[768][1]cuda:0" = _foreach_mul_1[91]
+	        getitem_684: "f32[768][1]cuda:0" = _foreach_mul_1[92]
+	        getitem_685: "f32[768][1]cuda:0" = _foreach_mul_1[93]
+	        getitem_686: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[94]
+	        getitem_687: "f32[3072][1]cuda:0" = _foreach_mul_1[95]
+	        getitem_688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[96]
+	        getitem_689: "f32[768][1]cuda:0" = _foreach_mul_1[97]
+	        getitem_690: "f32[768][1]cuda:0" = _foreach_mul_1[98]
+	        getitem_691: "f32[768][1]cuda:0" = _foreach_mul_1[99]
+	        getitem_692: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[100]
+	        getitem_693: "f32[2304][1]cuda:0" = _foreach_mul_1[101]
+	        getitem_694: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[102]
+	        getitem_695: "f32[768][1]cuda:0" = _foreach_mul_1[103]
+	        getitem_696: "f32[768][1]cuda:0" = _foreach_mul_1[104]
+	        getitem_697: "f32[768][1]cuda:0" = _foreach_mul_1[105]
+	        getitem_698: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[106]
+	        getitem_699: "f32[3072][1]cuda:0" = _foreach_mul_1[107]
+	        getitem_700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[108]
+	        getitem_701: "f32[768][1]cuda:0" = _foreach_mul_1[109]
+	        getitem_702: "f32[768][1]cuda:0" = _foreach_mul_1[110]
+	        getitem_703: "f32[768][1]cuda:0" = _foreach_mul_1[111]
+	        getitem_704: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[112]
+	        getitem_705: "f32[2304][1]cuda:0" = _foreach_mul_1[113]
+	        getitem_706: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[114]
+	        getitem_707: "f32[768][1]cuda:0" = _foreach_mul_1[115]
+	        getitem_708: "f32[768][1]cuda:0" = _foreach_mul_1[116]
+	        getitem_709: "f32[768][1]cuda:0" = _foreach_mul_1[117]
+	        getitem_710: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[118]
+	        getitem_711: "f32[3072][1]cuda:0" = _foreach_mul_1[119]
+	        getitem_712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[120]
+	        getitem_713: "f32[768][1]cuda:0" = _foreach_mul_1[121]
+	        getitem_714: "f32[768][1]cuda:0" = _foreach_mul_1[122]
+	        getitem_715: "f32[768][1]cuda:0" = _foreach_mul_1[123]
+	        getitem_716: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[124]
+	        getitem_717: "f32[2304][1]cuda:0" = _foreach_mul_1[125]
+	        getitem_718: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[126]
+	        getitem_719: "f32[768][1]cuda:0" = _foreach_mul_1[127]
+	        getitem_720: "f32[768][1]cuda:0" = _foreach_mul_1[128]
+	        getitem_721: "f32[768][1]cuda:0" = _foreach_mul_1[129]
+	        getitem_722: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[130]
+	        getitem_723: "f32[3072][1]cuda:0" = _foreach_mul_1[131]
+	        getitem_724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[132]
+	        getitem_725: "f32[768][1]cuda:0" = _foreach_mul_1[133]
+	        getitem_726: "f32[768][1]cuda:0" = _foreach_mul_1[134]
+	        getitem_727: "f32[768][1]cuda:0" = _foreach_mul_1[135]
+	        getitem_728: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_1[136]
+	        getitem_729: "f32[2304][1]cuda:0" = _foreach_mul_1[137]
+	        getitem_730: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_1[138]
+	        getitem_731: "f32[768][1]cuda:0" = _foreach_mul_1[139]
+	        getitem_732: "f32[768][1]cuda:0" = _foreach_mul_1[140]
+	        getitem_733: "f32[768][1]cuda:0" = _foreach_mul_1[141]
+	        getitem_734: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_1[142]
+	        getitem_735: "f32[3072][1]cuda:0" = _foreach_mul_1[143]
+	        getitem_736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_1[144]
+	        getitem_737: "f32[768][1]cuda:0" = _foreach_mul_1[145]
+	        getitem_738: "f32[768][1]cuda:0" = _foreach_mul_1[146]
+	        getitem_739: "f32[768][1]cuda:0" = _foreach_mul_1[147];  _foreach_mul_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]);  arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None
+	        getitem_740: "f32[50304, 768][768, 1]cuda:0" = _foreach_mul_2[0]
+	        getitem_741: "f32[1024, 768][768, 1]cuda:0" = _foreach_mul_2[1]
+	        getitem_742: "f32[768][1]cuda:0" = _foreach_mul_2[2]
+	        getitem_743: "f32[768][1]cuda:0" = _foreach_mul_2[3]
+	        getitem_744: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[4]
+	        getitem_745: "f32[2304][1]cuda:0" = _foreach_mul_2[5]
+	        getitem_746: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[6]
+	        getitem_747: "f32[768][1]cuda:0" = _foreach_mul_2[7]
+	        getitem_748: "f32[768][1]cuda:0" = _foreach_mul_2[8]
+	        getitem_749: "f32[768][1]cuda:0" = _foreach_mul_2[9]
+	        getitem_750: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[10]
+	        getitem_751: "f32[3072][1]cuda:0" = _foreach_mul_2[11]
+	        getitem_752: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[12]
+	        getitem_753: "f32[768][1]cuda:0" = _foreach_mul_2[13]
+	        getitem_754: "f32[768][1]cuda:0" = _foreach_mul_2[14]
+	        getitem_755: "f32[768][1]cuda:0" = _foreach_mul_2[15]
+	        getitem_756: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[16]
+	        getitem_757: "f32[2304][1]cuda:0" = _foreach_mul_2[17]
+	        getitem_758: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[18]
+	        getitem_759: "f32[768][1]cuda:0" = _foreach_mul_2[19]
+	        getitem_760: "f32[768][1]cuda:0" = _foreach_mul_2[20]
+	        getitem_761: "f32[768][1]cuda:0" = _foreach_mul_2[21]
+	        getitem_762: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[22]
+	        getitem_763: "f32[3072][1]cuda:0" = _foreach_mul_2[23]
+	        getitem_764: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[24]
+	        getitem_765: "f32[768][1]cuda:0" = _foreach_mul_2[25]
+	        getitem_766: "f32[768][1]cuda:0" = _foreach_mul_2[26]
+	        getitem_767: "f32[768][1]cuda:0" = _foreach_mul_2[27]
+	        getitem_768: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[28]
+	        getitem_769: "f32[2304][1]cuda:0" = _foreach_mul_2[29]
+	        getitem_770: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[30]
+	        getitem_771: "f32[768][1]cuda:0" = _foreach_mul_2[31]
+	        getitem_772: "f32[768][1]cuda:0" = _foreach_mul_2[32]
+	        getitem_773: "f32[768][1]cuda:0" = _foreach_mul_2[33]
+	        getitem_774: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[34]
+	        getitem_775: "f32[3072][1]cuda:0" = _foreach_mul_2[35]
+	        getitem_776: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[36]
+	        getitem_777: "f32[768][1]cuda:0" = _foreach_mul_2[37]
+	        getitem_778: "f32[768][1]cuda:0" = _foreach_mul_2[38]
+	        getitem_779: "f32[768][1]cuda:0" = _foreach_mul_2[39]
+	        getitem_780: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[40]
+	        getitem_781: "f32[2304][1]cuda:0" = _foreach_mul_2[41]
+	        getitem_782: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[42]
+	        getitem_783: "f32[768][1]cuda:0" = _foreach_mul_2[43]
+	        getitem_784: "f32[768][1]cuda:0" = _foreach_mul_2[44]
+	        getitem_785: "f32[768][1]cuda:0" = _foreach_mul_2[45]
+	        getitem_786: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[46]
+	        getitem_787: "f32[3072][1]cuda:0" = _foreach_mul_2[47]
+	        getitem_788: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[48]
+	        getitem_789: "f32[768][1]cuda:0" = _foreach_mul_2[49]
+	        getitem_790: "f32[768][1]cuda:0" = _foreach_mul_2[50]
+	        getitem_791: "f32[768][1]cuda:0" = _foreach_mul_2[51]
+	        getitem_792: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[52]
+	        getitem_793: "f32[2304][1]cuda:0" = _foreach_mul_2[53]
+	        getitem_794: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[54]
+	        getitem_795: "f32[768][1]cuda:0" = _foreach_mul_2[55]
+	        getitem_796: "f32[768][1]cuda:0" = _foreach_mul_2[56]
+	        getitem_797: "f32[768][1]cuda:0" = _foreach_mul_2[57]
+	        getitem_798: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[58]
+	        getitem_799: "f32[3072][1]cuda:0" = _foreach_mul_2[59]
+	        getitem_800: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[60]
+	        getitem_801: "f32[768][1]cuda:0" = _foreach_mul_2[61]
+	        getitem_802: "f32[768][1]cuda:0" = _foreach_mul_2[62]
+	        getitem_803: "f32[768][1]cuda:0" = _foreach_mul_2[63]
+	        getitem_804: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[64]
+	        getitem_805: "f32[2304][1]cuda:0" = _foreach_mul_2[65]
+	        getitem_806: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[66]
+	        getitem_807: "f32[768][1]cuda:0" = _foreach_mul_2[67]
+	        getitem_808: "f32[768][1]cuda:0" = _foreach_mul_2[68]
+	        getitem_809: "f32[768][1]cuda:0" = _foreach_mul_2[69]
+	        getitem_810: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[70]
+	        getitem_811: "f32[3072][1]cuda:0" = _foreach_mul_2[71]
+	        getitem_812: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[72]
+	        getitem_813: "f32[768][1]cuda:0" = _foreach_mul_2[73]
+	        getitem_814: "f32[768][1]cuda:0" = _foreach_mul_2[74]
+	        getitem_815: "f32[768][1]cuda:0" = _foreach_mul_2[75]
+	        getitem_816: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[76]
+	        getitem_817: "f32[2304][1]cuda:0" = _foreach_mul_2[77]
+	        getitem_818: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[78]
+	        getitem_819: "f32[768][1]cuda:0" = _foreach_mul_2[79]
+	        getitem_820: "f32[768][1]cuda:0" = _foreach_mul_2[80]
+	        getitem_821: "f32[768][1]cuda:0" = _foreach_mul_2[81]
+	        getitem_822: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[82]
+	        getitem_823: "f32[3072][1]cuda:0" = _foreach_mul_2[83]
+	        getitem_824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[84]
+	        getitem_825: "f32[768][1]cuda:0" = _foreach_mul_2[85]
+	        getitem_826: "f32[768][1]cuda:0" = _foreach_mul_2[86]
+	        getitem_827: "f32[768][1]cuda:0" = _foreach_mul_2[87]
+	        getitem_828: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[88]
+	        getitem_829: "f32[2304][1]cuda:0" = _foreach_mul_2[89]
+	        getitem_830: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[90]
+	        getitem_831: "f32[768][1]cuda:0" = _foreach_mul_2[91]
+	        getitem_832: "f32[768][1]cuda:0" = _foreach_mul_2[92]
+	        getitem_833: "f32[768][1]cuda:0" = _foreach_mul_2[93]
+	        getitem_834: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[94]
+	        getitem_835: "f32[3072][1]cuda:0" = _foreach_mul_2[95]
+	        getitem_836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[96]
+	        getitem_837: "f32[768][1]cuda:0" = _foreach_mul_2[97]
+	        getitem_838: "f32[768][1]cuda:0" = _foreach_mul_2[98]
+	        getitem_839: "f32[768][1]cuda:0" = _foreach_mul_2[99]
+	        getitem_840: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[100]
+	        getitem_841: "f32[2304][1]cuda:0" = _foreach_mul_2[101]
+	        getitem_842: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[102]
+	        getitem_843: "f32[768][1]cuda:0" = _foreach_mul_2[103]
+	        getitem_844: "f32[768][1]cuda:0" = _foreach_mul_2[104]
+	        getitem_845: "f32[768][1]cuda:0" = _foreach_mul_2[105]
+	        getitem_846: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[106]
+	        getitem_847: "f32[3072][1]cuda:0" = _foreach_mul_2[107]
+	        getitem_848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[108]
+	        getitem_849: "f32[768][1]cuda:0" = _foreach_mul_2[109]
+	        getitem_850: "f32[768][1]cuda:0" = _foreach_mul_2[110]
+	        getitem_851: "f32[768][1]cuda:0" = _foreach_mul_2[111]
+	        getitem_852: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[112]
+	        getitem_853: "f32[2304][1]cuda:0" = _foreach_mul_2[113]
+	        getitem_854: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[114]
+	        getitem_855: "f32[768][1]cuda:0" = _foreach_mul_2[115]
+	        getitem_856: "f32[768][1]cuda:0" = _foreach_mul_2[116]
+	        getitem_857: "f32[768][1]cuda:0" = _foreach_mul_2[117]
+	        getitem_858: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[118]
+	        getitem_859: "f32[3072][1]cuda:0" = _foreach_mul_2[119]
+	        getitem_860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[120]
+	        getitem_861: "f32[768][1]cuda:0" = _foreach_mul_2[121]
+	        getitem_862: "f32[768][1]cuda:0" = _foreach_mul_2[122]
+	        getitem_863: "f32[768][1]cuda:0" = _foreach_mul_2[123]
+	        getitem_864: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[124]
+	        getitem_865: "f32[2304][1]cuda:0" = _foreach_mul_2[125]
+	        getitem_866: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[126]
+	        getitem_867: "f32[768][1]cuda:0" = _foreach_mul_2[127]
+	        getitem_868: "f32[768][1]cuda:0" = _foreach_mul_2[128]
+	        getitem_869: "f32[768][1]cuda:0" = _foreach_mul_2[129]
+	        getitem_870: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[130]
+	        getitem_871: "f32[3072][1]cuda:0" = _foreach_mul_2[131]
+	        getitem_872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[132]
+	        getitem_873: "f32[768][1]cuda:0" = _foreach_mul_2[133]
+	        getitem_874: "f32[768][1]cuda:0" = _foreach_mul_2[134]
+	        getitem_875: "f32[768][1]cuda:0" = _foreach_mul_2[135]
+	        getitem_876: "f32[2304, 768][768, 1]cuda:0" = _foreach_mul_2[136]
+	        getitem_877: "f32[2304][1]cuda:0" = _foreach_mul_2[137]
+	        getitem_878: "f32[768, 768][768, 1]cuda:0" = _foreach_mul_2[138]
+	        getitem_879: "f32[768][1]cuda:0" = _foreach_mul_2[139]
+	        getitem_880: "f32[768][1]cuda:0" = _foreach_mul_2[140]
+	        getitem_881: "f32[768][1]cuda:0" = _foreach_mul_2[141]
+	        getitem_882: "f32[3072, 768][768, 1]cuda:0" = _foreach_mul_2[142]
+	        getitem_883: "f32[3072][1]cuda:0" = _foreach_mul_2[143]
+	        getitem_884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_mul_2[144]
+	        getitem_885: "f32[768][1]cuda:0" = _foreach_mul_2[145]
+	        getitem_886: "f32[768][1]cuda:0" = _foreach_mul_2[146]
+	        getitem_887: "f32[768][1]cuda:0" = _foreach_mul_2[147];  _foreach_mul_2 = None
+	        _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009);  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None
+	        getitem_888: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_2[0]
+	        getitem_889: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_2[1]
+	        getitem_890: "f32[768][1]cuda:0" = _foreach_add_2[2]
+	        getitem_891: "f32[768][1]cuda:0" = _foreach_add_2[3]
+	        getitem_892: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[4]
+	        getitem_893: "f32[2304][1]cuda:0" = _foreach_add_2[5]
+	        getitem_894: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[6]
+	        getitem_895: "f32[768][1]cuda:0" = _foreach_add_2[7]
+	        getitem_896: "f32[768][1]cuda:0" = _foreach_add_2[8]
+	        getitem_897: "f32[768][1]cuda:0" = _foreach_add_2[9]
+	        getitem_898: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[10]
+	        getitem_899: "f32[3072][1]cuda:0" = _foreach_add_2[11]
+	        getitem_900: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[12]
+	        getitem_901: "f32[768][1]cuda:0" = _foreach_add_2[13]
+	        getitem_902: "f32[768][1]cuda:0" = _foreach_add_2[14]
+	        getitem_903: "f32[768][1]cuda:0" = _foreach_add_2[15]
+	        getitem_904: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[16]
+	        getitem_905: "f32[2304][1]cuda:0" = _foreach_add_2[17]
+	        getitem_906: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[18]
+	        getitem_907: "f32[768][1]cuda:0" = _foreach_add_2[19]
+	        getitem_908: "f32[768][1]cuda:0" = _foreach_add_2[20]
+	        getitem_909: "f32[768][1]cuda:0" = _foreach_add_2[21]
+	        getitem_910: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[22]
+	        getitem_911: "f32[3072][1]cuda:0" = _foreach_add_2[23]
+	        getitem_912: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[24]
+	        getitem_913: "f32[768][1]cuda:0" = _foreach_add_2[25]
+	        getitem_914: "f32[768][1]cuda:0" = _foreach_add_2[26]
+	        getitem_915: "f32[768][1]cuda:0" = _foreach_add_2[27]
+	        getitem_916: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[28]
+	        getitem_917: "f32[2304][1]cuda:0" = _foreach_add_2[29]
+	        getitem_918: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[30]
+	        getitem_919: "f32[768][1]cuda:0" = _foreach_add_2[31]
+	        getitem_920: "f32[768][1]cuda:0" = _foreach_add_2[32]
+	        getitem_921: "f32[768][1]cuda:0" = _foreach_add_2[33]
+	        getitem_922: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[34]
+	        getitem_923: "f32[3072][1]cuda:0" = _foreach_add_2[35]
+	        getitem_924: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[36]
+	        getitem_925: "f32[768][1]cuda:0" = _foreach_add_2[37]
+	        getitem_926: "f32[768][1]cuda:0" = _foreach_add_2[38]
+	        getitem_927: "f32[768][1]cuda:0" = _foreach_add_2[39]
+	        getitem_928: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[40]
+	        getitem_929: "f32[2304][1]cuda:0" = _foreach_add_2[41]
+	        getitem_930: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[42]
+	        getitem_931: "f32[768][1]cuda:0" = _foreach_add_2[43]
+	        getitem_932: "f32[768][1]cuda:0" = _foreach_add_2[44]
+	        getitem_933: "f32[768][1]cuda:0" = _foreach_add_2[45]
+	        getitem_934: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[46]
+	        getitem_935: "f32[3072][1]cuda:0" = _foreach_add_2[47]
+	        getitem_936: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[48]
+	        getitem_937: "f32[768][1]cuda:0" = _foreach_add_2[49]
+	        getitem_938: "f32[768][1]cuda:0" = _foreach_add_2[50]
+	        getitem_939: "f32[768][1]cuda:0" = _foreach_add_2[51]
+	        getitem_940: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[52]
+	        getitem_941: "f32[2304][1]cuda:0" = _foreach_add_2[53]
+	        getitem_942: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[54]
+	        getitem_943: "f32[768][1]cuda:0" = _foreach_add_2[55]
+	        getitem_944: "f32[768][1]cuda:0" = _foreach_add_2[56]
+	        getitem_945: "f32[768][1]cuda:0" = _foreach_add_2[57]
+	        getitem_946: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[58]
+	        getitem_947: "f32[3072][1]cuda:0" = _foreach_add_2[59]
+	        getitem_948: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[60]
+	        getitem_949: "f32[768][1]cuda:0" = _foreach_add_2[61]
+	        getitem_950: "f32[768][1]cuda:0" = _foreach_add_2[62]
+	        getitem_951: "f32[768][1]cuda:0" = _foreach_add_2[63]
+	        getitem_952: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[64]
+	        getitem_953: "f32[2304][1]cuda:0" = _foreach_add_2[65]
+	        getitem_954: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[66]
+	        getitem_955: "f32[768][1]cuda:0" = _foreach_add_2[67]
+	        getitem_956: "f32[768][1]cuda:0" = _foreach_add_2[68]
+	        getitem_957: "f32[768][1]cuda:0" = _foreach_add_2[69]
+	        getitem_958: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[70]
+	        getitem_959: "f32[3072][1]cuda:0" = _foreach_add_2[71]
+	        getitem_960: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[72]
+	        getitem_961: "f32[768][1]cuda:0" = _foreach_add_2[73]
+	        getitem_962: "f32[768][1]cuda:0" = _foreach_add_2[74]
+	        getitem_963: "f32[768][1]cuda:0" = _foreach_add_2[75]
+	        getitem_964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[76]
+	        getitem_965: "f32[2304][1]cuda:0" = _foreach_add_2[77]
+	        getitem_966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[78]
+	        getitem_967: "f32[768][1]cuda:0" = _foreach_add_2[79]
+	        getitem_968: "f32[768][1]cuda:0" = _foreach_add_2[80]
+	        getitem_969: "f32[768][1]cuda:0" = _foreach_add_2[81]
+	        getitem_970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[82]
+	        getitem_971: "f32[3072][1]cuda:0" = _foreach_add_2[83]
+	        getitem_972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[84]
+	        getitem_973: "f32[768][1]cuda:0" = _foreach_add_2[85]
+	        getitem_974: "f32[768][1]cuda:0" = _foreach_add_2[86]
+	        getitem_975: "f32[768][1]cuda:0" = _foreach_add_2[87]
+	        getitem_976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[88]
+	        getitem_977: "f32[2304][1]cuda:0" = _foreach_add_2[89]
+	        getitem_978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[90]
+	        getitem_979: "f32[768][1]cuda:0" = _foreach_add_2[91]
+	        getitem_980: "f32[768][1]cuda:0" = _foreach_add_2[92]
+	        getitem_981: "f32[768][1]cuda:0" = _foreach_add_2[93]
+	        getitem_982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[94]
+	        getitem_983: "f32[3072][1]cuda:0" = _foreach_add_2[95]
+	        getitem_984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[96]
+	        getitem_985: "f32[768][1]cuda:0" = _foreach_add_2[97]
+	        getitem_986: "f32[768][1]cuda:0" = _foreach_add_2[98]
+	        getitem_987: "f32[768][1]cuda:0" = _foreach_add_2[99]
+	        getitem_988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[100]
+	        getitem_989: "f32[2304][1]cuda:0" = _foreach_add_2[101]
+	        getitem_990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[102]
+	        getitem_991: "f32[768][1]cuda:0" = _foreach_add_2[103]
+	        getitem_992: "f32[768][1]cuda:0" = _foreach_add_2[104]
+	        getitem_993: "f32[768][1]cuda:0" = _foreach_add_2[105]
+	        getitem_994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[106]
+	        getitem_995: "f32[3072][1]cuda:0" = _foreach_add_2[107]
+	        getitem_996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[108]
+	        getitem_997: "f32[768][1]cuda:0" = _foreach_add_2[109]
+	        getitem_998: "f32[768][1]cuda:0" = _foreach_add_2[110]
+	        getitem_999: "f32[768][1]cuda:0" = _foreach_add_2[111]
+	        getitem_1000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[112]
+	        getitem_1001: "f32[2304][1]cuda:0" = _foreach_add_2[113]
+	        getitem_1002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[114]
+	        getitem_1003: "f32[768][1]cuda:0" = _foreach_add_2[115]
+	        getitem_1004: "f32[768][1]cuda:0" = _foreach_add_2[116]
+	        getitem_1005: "f32[768][1]cuda:0" = _foreach_add_2[117]
+	        getitem_1006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[118]
+	        getitem_1007: "f32[3072][1]cuda:0" = _foreach_add_2[119]
+	        getitem_1008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[120]
+	        getitem_1009: "f32[768][1]cuda:0" = _foreach_add_2[121]
+	        getitem_1010: "f32[768][1]cuda:0" = _foreach_add_2[122]
+	        getitem_1011: "f32[768][1]cuda:0" = _foreach_add_2[123]
+	        getitem_1012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[124]
+	        getitem_1013: "f32[2304][1]cuda:0" = _foreach_add_2[125]
+	        getitem_1014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[126]
+	        getitem_1015: "f32[768][1]cuda:0" = _foreach_add_2[127]
+	        getitem_1016: "f32[768][1]cuda:0" = _foreach_add_2[128]
+	        getitem_1017: "f32[768][1]cuda:0" = _foreach_add_2[129]
+	        getitem_1018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[130]
+	        getitem_1019: "f32[3072][1]cuda:0" = _foreach_add_2[131]
+	        getitem_1020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[132]
+	        getitem_1021: "f32[768][1]cuda:0" = _foreach_add_2[133]
+	        getitem_1022: "f32[768][1]cuda:0" = _foreach_add_2[134]
+	        getitem_1023: "f32[768][1]cuda:0" = _foreach_add_2[135]
+	        getitem_1024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_2[136]
+	        getitem_1025: "f32[2304][1]cuda:0" = _foreach_add_2[137]
+	        getitem_1026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_2[138]
+	        getitem_1027: "f32[768][1]cuda:0" = _foreach_add_2[139]
+	        getitem_1028: "f32[768][1]cuda:0" = _foreach_add_2[140]
+	        getitem_1029: "f32[768][1]cuda:0" = _foreach_add_2[141]
+	        getitem_1030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_2[142]
+	        getitem_1031: "f32[3072][1]cuda:0" = _foreach_add_2[143]
+	        getitem_1032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_2[144]
+	        getitem_1033: "f32[768][1]cuda:0" = _foreach_add_2[145]
+	        getitem_1034: "f32[768][1]cuda:0" = _foreach_add_2[146]
+	        getitem_1035: "f32[768][1]cuda:0" = _foreach_add_2[147];  _foreach_add_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:555 in _multi_tensor_adam, code: bias_correction1 = torch._foreach_pow(beta1, device_state_steps)
+	        _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])
+	        getitem_1036: "f32[][]cuda:0" = _foreach_pow[0]
+	        getitem_1037: "f32[][]cuda:0" = _foreach_pow[1]
+	        getitem_1038: "f32[][]cuda:0" = _foreach_pow[2]
+	        getitem_1039: "f32[][]cuda:0" = _foreach_pow[3]
+	        getitem_1040: "f32[][]cuda:0" = _foreach_pow[4]
+	        getitem_1041: "f32[][]cuda:0" = _foreach_pow[5]
+	        getitem_1042: "f32[][]cuda:0" = _foreach_pow[6]
+	        getitem_1043: "f32[][]cuda:0" = _foreach_pow[7]
+	        getitem_1044: "f32[][]cuda:0" = _foreach_pow[8]
+	        getitem_1045: "f32[][]cuda:0" = _foreach_pow[9]
+	        getitem_1046: "f32[][]cuda:0" = _foreach_pow[10]
+	        getitem_1047: "f32[][]cuda:0" = _foreach_pow[11]
+	        getitem_1048: "f32[][]cuda:0" = _foreach_pow[12]
+	        getitem_1049: "f32[][]cuda:0" = _foreach_pow[13]
+	        getitem_1050: "f32[][]cuda:0" = _foreach_pow[14]
+	        getitem_1051: "f32[][]cuda:0" = _foreach_pow[15]
+	        getitem_1052: "f32[][]cuda:0" = _foreach_pow[16]
+	        getitem_1053: "f32[][]cuda:0" = _foreach_pow[17]
+	        getitem_1054: "f32[][]cuda:0" = _foreach_pow[18]
+	        getitem_1055: "f32[][]cuda:0" = _foreach_pow[19]
+	        getitem_1056: "f32[][]cuda:0" = _foreach_pow[20]
+	        getitem_1057: "f32[][]cuda:0" = _foreach_pow[21]
+	        getitem_1058: "f32[][]cuda:0" = _foreach_pow[22]
+	        getitem_1059: "f32[][]cuda:0" = _foreach_pow[23]
+	        getitem_1060: "f32[][]cuda:0" = _foreach_pow[24]
+	        getitem_1061: "f32[][]cuda:0" = _foreach_pow[25]
+	        getitem_1062: "f32[][]cuda:0" = _foreach_pow[26]
+	        getitem_1063: "f32[][]cuda:0" = _foreach_pow[27]
+	        getitem_1064: "f32[][]cuda:0" = _foreach_pow[28]
+	        getitem_1065: "f32[][]cuda:0" = _foreach_pow[29]
+	        getitem_1066: "f32[][]cuda:0" = _foreach_pow[30]
+	        getitem_1067: "f32[][]cuda:0" = _foreach_pow[31]
+	        getitem_1068: "f32[][]cuda:0" = _foreach_pow[32]
+	        getitem_1069: "f32[][]cuda:0" = _foreach_pow[33]
+	        getitem_1070: "f32[][]cuda:0" = _foreach_pow[34]
+	        getitem_1071: "f32[][]cuda:0" = _foreach_pow[35]
+	        getitem_1072: "f32[][]cuda:0" = _foreach_pow[36]
+	        getitem_1073: "f32[][]cuda:0" = _foreach_pow[37]
+	        getitem_1074: "f32[][]cuda:0" = _foreach_pow[38]
+	        getitem_1075: "f32[][]cuda:0" = _foreach_pow[39]
+	        getitem_1076: "f32[][]cuda:0" = _foreach_pow[40]
+	        getitem_1077: "f32[][]cuda:0" = _foreach_pow[41]
+	        getitem_1078: "f32[][]cuda:0" = _foreach_pow[42]
+	        getitem_1079: "f32[][]cuda:0" = _foreach_pow[43]
+	        getitem_1080: "f32[][]cuda:0" = _foreach_pow[44]
+	        getitem_1081: "f32[][]cuda:0" = _foreach_pow[45]
+	        getitem_1082: "f32[][]cuda:0" = _foreach_pow[46]
+	        getitem_1083: "f32[][]cuda:0" = _foreach_pow[47]
+	        getitem_1084: "f32[][]cuda:0" = _foreach_pow[48]
+	        getitem_1085: "f32[][]cuda:0" = _foreach_pow[49]
+	        getitem_1086: "f32[][]cuda:0" = _foreach_pow[50]
+	        getitem_1087: "f32[][]cuda:0" = _foreach_pow[51]
+	        getitem_1088: "f32[][]cuda:0" = _foreach_pow[52]
+	        getitem_1089: "f32[][]cuda:0" = _foreach_pow[53]
+	        getitem_1090: "f32[][]cuda:0" = _foreach_pow[54]
+	        getitem_1091: "f32[][]cuda:0" = _foreach_pow[55]
+	        getitem_1092: "f32[][]cuda:0" = _foreach_pow[56]
+	        getitem_1093: "f32[][]cuda:0" = _foreach_pow[57]
+	        getitem_1094: "f32[][]cuda:0" = _foreach_pow[58]
+	        getitem_1095: "f32[][]cuda:0" = _foreach_pow[59]
+	        getitem_1096: "f32[][]cuda:0" = _foreach_pow[60]
+	        getitem_1097: "f32[][]cuda:0" = _foreach_pow[61]
+	        getitem_1098: "f32[][]cuda:0" = _foreach_pow[62]
+	        getitem_1099: "f32[][]cuda:0" = _foreach_pow[63]
+	        getitem_1100: "f32[][]cuda:0" = _foreach_pow[64]
+	        getitem_1101: "f32[][]cuda:0" = _foreach_pow[65]
+	        getitem_1102: "f32[][]cuda:0" = _foreach_pow[66]
+	        getitem_1103: "f32[][]cuda:0" = _foreach_pow[67]
+	        getitem_1104: "f32[][]cuda:0" = _foreach_pow[68]
+	        getitem_1105: "f32[][]cuda:0" = _foreach_pow[69]
+	        getitem_1106: "f32[][]cuda:0" = _foreach_pow[70]
+	        getitem_1107: "f32[][]cuda:0" = _foreach_pow[71]
+	        getitem_1108: "f32[][]cuda:0" = _foreach_pow[72]
+	        getitem_1109: "f32[][]cuda:0" = _foreach_pow[73]
+	        getitem_1110: "f32[][]cuda:0" = _foreach_pow[74]
+	        getitem_1111: "f32[][]cuda:0" = _foreach_pow[75]
+	        getitem_1112: "f32[][]cuda:0" = _foreach_pow[76]
+	        getitem_1113: "f32[][]cuda:0" = _foreach_pow[77]
+	        getitem_1114: "f32[][]cuda:0" = _foreach_pow[78]
+	        getitem_1115: "f32[][]cuda:0" = _foreach_pow[79]
+	        getitem_1116: "f32[][]cuda:0" = _foreach_pow[80]
+	        getitem_1117: "f32[][]cuda:0" = _foreach_pow[81]
+	        getitem_1118: "f32[][]cuda:0" = _foreach_pow[82]
+	        getitem_1119: "f32[][]cuda:0" = _foreach_pow[83]
+	        getitem_1120: "f32[][]cuda:0" = _foreach_pow[84]
+	        getitem_1121: "f32[][]cuda:0" = _foreach_pow[85]
+	        getitem_1122: "f32[][]cuda:0" = _foreach_pow[86]
+	        getitem_1123: "f32[][]cuda:0" = _foreach_pow[87]
+	        getitem_1124: "f32[][]cuda:0" = _foreach_pow[88]
+	        getitem_1125: "f32[][]cuda:0" = _foreach_pow[89]
+	        getitem_1126: "f32[][]cuda:0" = _foreach_pow[90]
+	        getitem_1127: "f32[][]cuda:0" = _foreach_pow[91]
+	        getitem_1128: "f32[][]cuda:0" = _foreach_pow[92]
+	        getitem_1129: "f32[][]cuda:0" = _foreach_pow[93]
+	        getitem_1130: "f32[][]cuda:0" = _foreach_pow[94]
+	        getitem_1131: "f32[][]cuda:0" = _foreach_pow[95]
+	        getitem_1132: "f32[][]cuda:0" = _foreach_pow[96]
+	        getitem_1133: "f32[][]cuda:0" = _foreach_pow[97]
+	        getitem_1134: "f32[][]cuda:0" = _foreach_pow[98]
+	        getitem_1135: "f32[][]cuda:0" = _foreach_pow[99]
+	        getitem_1136: "f32[][]cuda:0" = _foreach_pow[100]
+	        getitem_1137: "f32[][]cuda:0" = _foreach_pow[101]
+	        getitem_1138: "f32[][]cuda:0" = _foreach_pow[102]
+	        getitem_1139: "f32[][]cuda:0" = _foreach_pow[103]
+	        getitem_1140: "f32[][]cuda:0" = _foreach_pow[104]
+	        getitem_1141: "f32[][]cuda:0" = _foreach_pow[105]
+	        getitem_1142: "f32[][]cuda:0" = _foreach_pow[106]
+	        getitem_1143: "f32[][]cuda:0" = _foreach_pow[107]
+	        getitem_1144: "f32[][]cuda:0" = _foreach_pow[108]
+	        getitem_1145: "f32[][]cuda:0" = _foreach_pow[109]
+	        getitem_1146: "f32[][]cuda:0" = _foreach_pow[110]
+	        getitem_1147: "f32[][]cuda:0" = _foreach_pow[111]
+	        getitem_1148: "f32[][]cuda:0" = _foreach_pow[112]
+	        getitem_1149: "f32[][]cuda:0" = _foreach_pow[113]
+	        getitem_1150: "f32[][]cuda:0" = _foreach_pow[114]
+	        getitem_1151: "f32[][]cuda:0" = _foreach_pow[115]
+	        getitem_1152: "f32[][]cuda:0" = _foreach_pow[116]
+	        getitem_1153: "f32[][]cuda:0" = _foreach_pow[117]
+	        getitem_1154: "f32[][]cuda:0" = _foreach_pow[118]
+	        getitem_1155: "f32[][]cuda:0" = _foreach_pow[119]
+	        getitem_1156: "f32[][]cuda:0" = _foreach_pow[120]
+	        getitem_1157: "f32[][]cuda:0" = _foreach_pow[121]
+	        getitem_1158: "f32[][]cuda:0" = _foreach_pow[122]
+	        getitem_1159: "f32[][]cuda:0" = _foreach_pow[123]
+	        getitem_1160: "f32[][]cuda:0" = _foreach_pow[124]
+	        getitem_1161: "f32[][]cuda:0" = _foreach_pow[125]
+	        getitem_1162: "f32[][]cuda:0" = _foreach_pow[126]
+	        getitem_1163: "f32[][]cuda:0" = _foreach_pow[127]
+	        getitem_1164: "f32[][]cuda:0" = _foreach_pow[128]
+	        getitem_1165: "f32[][]cuda:0" = _foreach_pow[129]
+	        getitem_1166: "f32[][]cuda:0" = _foreach_pow[130]
+	        getitem_1167: "f32[][]cuda:0" = _foreach_pow[131]
+	        getitem_1168: "f32[][]cuda:0" = _foreach_pow[132]
+	        getitem_1169: "f32[][]cuda:0" = _foreach_pow[133]
+	        getitem_1170: "f32[][]cuda:0" = _foreach_pow[134]
+	        getitem_1171: "f32[][]cuda:0" = _foreach_pow[135]
+	        getitem_1172: "f32[][]cuda:0" = _foreach_pow[136]
+	        getitem_1173: "f32[][]cuda:0" = _foreach_pow[137]
+	        getitem_1174: "f32[][]cuda:0" = _foreach_pow[138]
+	        getitem_1175: "f32[][]cuda:0" = _foreach_pow[139]
+	        getitem_1176: "f32[][]cuda:0" = _foreach_pow[140]
+	        getitem_1177: "f32[][]cuda:0" = _foreach_pow[141]
+	        getitem_1178: "f32[][]cuda:0" = _foreach_pow[142]
+	        getitem_1179: "f32[][]cuda:0" = _foreach_pow[143]
+	        getitem_1180: "f32[][]cuda:0" = _foreach_pow[144]
+	        getitem_1181: "f32[][]cuda:0" = _foreach_pow[145]
+	        getitem_1182: "f32[][]cuda:0" = _foreach_pow[146]
+	        getitem_1183: "f32[][]cuda:0" = _foreach_pow[147];  _foreach_pow = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:556 in _multi_tensor_adam, code: bias_correction2 = torch._foreach_pow(beta2, device_state_steps)
+	        _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147]);  getitem = getitem_1 = getitem_2 = getitem_3 = getitem_4 = getitem_5 = getitem_6 = getitem_7 = getitem_8 = getitem_9 = getitem_10 = getitem_11 = getitem_12 = getitem_13 = getitem_14 = getitem_15 = getitem_16 = getitem_17 = getitem_18 = getitem_19 = getitem_20 = getitem_21 = getitem_22 = getitem_23 = getitem_24 = getitem_25 = getitem_26 = getitem_27 = getitem_28 = getitem_29 = getitem_30 = getitem_31 = getitem_32 = getitem_33 = getitem_34 = getitem_35 = getitem_36 = getitem_37 = getitem_38 = getitem_39 = getitem_40 = getitem_41 = getitem_42 = getitem_43 = getitem_44 = getitem_45 = getitem_46 = getitem_47 = getitem_48 = getitem_49 = getitem_50 = getitem_51 = getitem_52 = getitem_53 = getitem_54 = getitem_55 = getitem_56 = getitem_57 = getitem_58 = getitem_59 = getitem_60 = getitem_61 = getitem_62 = getitem_63 = getitem_64 = getitem_65 = getitem_66 = getitem_67 = getitem_68 = getitem_69 = getitem_70 = getitem_71 = getitem_72 = getitem_73 = getitem_74 = getitem_75 = getitem_76 = getitem_77 = getitem_78 = getitem_79 = getitem_80 = getitem_81 = getitem_82 = getitem_83 = getitem_84 = getitem_85 = getitem_86 = getitem_87 = getitem_88 = getitem_89 = getitem_90 = getitem_91 = getitem_92 = getitem_93 = getitem_94 = getitem_95 = getitem_96 = getitem_97 = getitem_98 = getitem_99 = getitem_100 = getitem_101 = getitem_102 = getitem_103 = getitem_104 = getitem_105 = getitem_106 = getitem_107 = getitem_108 = getitem_109 = getitem_110 = getitem_111 = getitem_112 = getitem_113 = getitem_114 = getitem_115 = getitem_116 = getitem_117 = getitem_118 = getitem_119 = getitem_120 = getitem_121 = getitem_122 = getitem_123 = getitem_124 = getitem_125 = getitem_126 = getitem_127 = getitem_128 = getitem_129 = getitem_130 = getitem_131 = getitem_132 = getitem_133 = getitem_134 = getitem_135 = getitem_136 = getitem_137 = getitem_138 = getitem_139 = getitem_140 = getitem_141 = getitem_142 = getitem_143 = getitem_144 = getitem_145 = getitem_146 = getitem_147 = None
+	        getitem_1184: "f32[][]cuda:0" = _foreach_pow_1[0]
+	        getitem_1185: "f32[][]cuda:0" = _foreach_pow_1[1]
+	        getitem_1186: "f32[][]cuda:0" = _foreach_pow_1[2]
+	        getitem_1187: "f32[][]cuda:0" = _foreach_pow_1[3]
+	        getitem_1188: "f32[][]cuda:0" = _foreach_pow_1[4]
+	        getitem_1189: "f32[][]cuda:0" = _foreach_pow_1[5]
+	        getitem_1190: "f32[][]cuda:0" = _foreach_pow_1[6]
+	        getitem_1191: "f32[][]cuda:0" = _foreach_pow_1[7]
+	        getitem_1192: "f32[][]cuda:0" = _foreach_pow_1[8]
+	        getitem_1193: "f32[][]cuda:0" = _foreach_pow_1[9]
+	        getitem_1194: "f32[][]cuda:0" = _foreach_pow_1[10]
+	        getitem_1195: "f32[][]cuda:0" = _foreach_pow_1[11]
+	        getitem_1196: "f32[][]cuda:0" = _foreach_pow_1[12]
+	        getitem_1197: "f32[][]cuda:0" = _foreach_pow_1[13]
+	        getitem_1198: "f32[][]cuda:0" = _foreach_pow_1[14]
+	        getitem_1199: "f32[][]cuda:0" = _foreach_pow_1[15]
+	        getitem_1200: "f32[][]cuda:0" = _foreach_pow_1[16]
+	        getitem_1201: "f32[][]cuda:0" = _foreach_pow_1[17]
+	        getitem_1202: "f32[][]cuda:0" = _foreach_pow_1[18]
+	        getitem_1203: "f32[][]cuda:0" = _foreach_pow_1[19]
+	        getitem_1204: "f32[][]cuda:0" = _foreach_pow_1[20]
+	        getitem_1205: "f32[][]cuda:0" = _foreach_pow_1[21]
+	        getitem_1206: "f32[][]cuda:0" = _foreach_pow_1[22]
+	        getitem_1207: "f32[][]cuda:0" = _foreach_pow_1[23]
+	        getitem_1208: "f32[][]cuda:0" = _foreach_pow_1[24]
+	        getitem_1209: "f32[][]cuda:0" = _foreach_pow_1[25]
+	        getitem_1210: "f32[][]cuda:0" = _foreach_pow_1[26]
+	        getitem_1211: "f32[][]cuda:0" = _foreach_pow_1[27]
+	        getitem_1212: "f32[][]cuda:0" = _foreach_pow_1[28]
+	        getitem_1213: "f32[][]cuda:0" = _foreach_pow_1[29]
+	        getitem_1214: "f32[][]cuda:0" = _foreach_pow_1[30]
+	        getitem_1215: "f32[][]cuda:0" = _foreach_pow_1[31]
+	        getitem_1216: "f32[][]cuda:0" = _foreach_pow_1[32]
+	        getitem_1217: "f32[][]cuda:0" = _foreach_pow_1[33]
+	        getitem_1218: "f32[][]cuda:0" = _foreach_pow_1[34]
+	        getitem_1219: "f32[][]cuda:0" = _foreach_pow_1[35]
+	        getitem_1220: "f32[][]cuda:0" = _foreach_pow_1[36]
+	        getitem_1221: "f32[][]cuda:0" = _foreach_pow_1[37]
+	        getitem_1222: "f32[][]cuda:0" = _foreach_pow_1[38]
+	        getitem_1223: "f32[][]cuda:0" = _foreach_pow_1[39]
+	        getitem_1224: "f32[][]cuda:0" = _foreach_pow_1[40]
+	        getitem_1225: "f32[][]cuda:0" = _foreach_pow_1[41]
+	        getitem_1226: "f32[][]cuda:0" = _foreach_pow_1[42]
+	        getitem_1227: "f32[][]cuda:0" = _foreach_pow_1[43]
+	        getitem_1228: "f32[][]cuda:0" = _foreach_pow_1[44]
+	        getitem_1229: "f32[][]cuda:0" = _foreach_pow_1[45]
+	        getitem_1230: "f32[][]cuda:0" = _foreach_pow_1[46]
+	        getitem_1231: "f32[][]cuda:0" = _foreach_pow_1[47]
+	        getitem_1232: "f32[][]cuda:0" = _foreach_pow_1[48]
+	        getitem_1233: "f32[][]cuda:0" = _foreach_pow_1[49]
+	        getitem_1234: "f32[][]cuda:0" = _foreach_pow_1[50]
+	        getitem_1235: "f32[][]cuda:0" = _foreach_pow_1[51]
+	        getitem_1236: "f32[][]cuda:0" = _foreach_pow_1[52]
+	        getitem_1237: "f32[][]cuda:0" = _foreach_pow_1[53]
+	        getitem_1238: "f32[][]cuda:0" = _foreach_pow_1[54]
+	        getitem_1239: "f32[][]cuda:0" = _foreach_pow_1[55]
+	        getitem_1240: "f32[][]cuda:0" = _foreach_pow_1[56]
+	        getitem_1241: "f32[][]cuda:0" = _foreach_pow_1[57]
+	        getitem_1242: "f32[][]cuda:0" = _foreach_pow_1[58]
+	        getitem_1243: "f32[][]cuda:0" = _foreach_pow_1[59]
+	        getitem_1244: "f32[][]cuda:0" = _foreach_pow_1[60]
+	        getitem_1245: "f32[][]cuda:0" = _foreach_pow_1[61]
+	        getitem_1246: "f32[][]cuda:0" = _foreach_pow_1[62]
+	        getitem_1247: "f32[][]cuda:0" = _foreach_pow_1[63]
+	        getitem_1248: "f32[][]cuda:0" = _foreach_pow_1[64]
+	        getitem_1249: "f32[][]cuda:0" = _foreach_pow_1[65]
+	        getitem_1250: "f32[][]cuda:0" = _foreach_pow_1[66]
+	        getitem_1251: "f32[][]cuda:0" = _foreach_pow_1[67]
+	        getitem_1252: "f32[][]cuda:0" = _foreach_pow_1[68]
+	        getitem_1253: "f32[][]cuda:0" = _foreach_pow_1[69]
+	        getitem_1254: "f32[][]cuda:0" = _foreach_pow_1[70]
+	        getitem_1255: "f32[][]cuda:0" = _foreach_pow_1[71]
+	        getitem_1256: "f32[][]cuda:0" = _foreach_pow_1[72]
+	        getitem_1257: "f32[][]cuda:0" = _foreach_pow_1[73]
+	        getitem_1258: "f32[][]cuda:0" = _foreach_pow_1[74]
+	        getitem_1259: "f32[][]cuda:0" = _foreach_pow_1[75]
+	        getitem_1260: "f32[][]cuda:0" = _foreach_pow_1[76]
+	        getitem_1261: "f32[][]cuda:0" = _foreach_pow_1[77]
+	        getitem_1262: "f32[][]cuda:0" = _foreach_pow_1[78]
+	        getitem_1263: "f32[][]cuda:0" = _foreach_pow_1[79]
+	        getitem_1264: "f32[][]cuda:0" = _foreach_pow_1[80]
+	        getitem_1265: "f32[][]cuda:0" = _foreach_pow_1[81]
+	        getitem_1266: "f32[][]cuda:0" = _foreach_pow_1[82]
+	        getitem_1267: "f32[][]cuda:0" = _foreach_pow_1[83]
+	        getitem_1268: "f32[][]cuda:0" = _foreach_pow_1[84]
+	        getitem_1269: "f32[][]cuda:0" = _foreach_pow_1[85]
+	        getitem_1270: "f32[][]cuda:0" = _foreach_pow_1[86]
+	        getitem_1271: "f32[][]cuda:0" = _foreach_pow_1[87]
+	        getitem_1272: "f32[][]cuda:0" = _foreach_pow_1[88]
+	        getitem_1273: "f32[][]cuda:0" = _foreach_pow_1[89]
+	        getitem_1274: "f32[][]cuda:0" = _foreach_pow_1[90]
+	        getitem_1275: "f32[][]cuda:0" = _foreach_pow_1[91]
+	        getitem_1276: "f32[][]cuda:0" = _foreach_pow_1[92]
+	        getitem_1277: "f32[][]cuda:0" = _foreach_pow_1[93]
+	        getitem_1278: "f32[][]cuda:0" = _foreach_pow_1[94]
+	        getitem_1279: "f32[][]cuda:0" = _foreach_pow_1[95]
+	        getitem_1280: "f32[][]cuda:0" = _foreach_pow_1[96]
+	        getitem_1281: "f32[][]cuda:0" = _foreach_pow_1[97]
+	        getitem_1282: "f32[][]cuda:0" = _foreach_pow_1[98]
+	        getitem_1283: "f32[][]cuda:0" = _foreach_pow_1[99]
+	        getitem_1284: "f32[][]cuda:0" = _foreach_pow_1[100]
+	        getitem_1285: "f32[][]cuda:0" = _foreach_pow_1[101]
+	        getitem_1286: "f32[][]cuda:0" = _foreach_pow_1[102]
+	        getitem_1287: "f32[][]cuda:0" = _foreach_pow_1[103]
+	        getitem_1288: "f32[][]cuda:0" = _foreach_pow_1[104]
+	        getitem_1289: "f32[][]cuda:0" = _foreach_pow_1[105]
+	        getitem_1290: "f32[][]cuda:0" = _foreach_pow_1[106]
+	        getitem_1291: "f32[][]cuda:0" = _foreach_pow_1[107]
+	        getitem_1292: "f32[][]cuda:0" = _foreach_pow_1[108]
+	        getitem_1293: "f32[][]cuda:0" = _foreach_pow_1[109]
+	        getitem_1294: "f32[][]cuda:0" = _foreach_pow_1[110]
+	        getitem_1295: "f32[][]cuda:0" = _foreach_pow_1[111]
+	        getitem_1296: "f32[][]cuda:0" = _foreach_pow_1[112]
+	        getitem_1297: "f32[][]cuda:0" = _foreach_pow_1[113]
+	        getitem_1298: "f32[][]cuda:0" = _foreach_pow_1[114]
+	        getitem_1299: "f32[][]cuda:0" = _foreach_pow_1[115]
+	        getitem_1300: "f32[][]cuda:0" = _foreach_pow_1[116]
+	        getitem_1301: "f32[][]cuda:0" = _foreach_pow_1[117]
+	        getitem_1302: "f32[][]cuda:0" = _foreach_pow_1[118]
+	        getitem_1303: "f32[][]cuda:0" = _foreach_pow_1[119]
+	        getitem_1304: "f32[][]cuda:0" = _foreach_pow_1[120]
+	        getitem_1305: "f32[][]cuda:0" = _foreach_pow_1[121]
+	        getitem_1306: "f32[][]cuda:0" = _foreach_pow_1[122]
+	        getitem_1307: "f32[][]cuda:0" = _foreach_pow_1[123]
+	        getitem_1308: "f32[][]cuda:0" = _foreach_pow_1[124]
+	        getitem_1309: "f32[][]cuda:0" = _foreach_pow_1[125]
+	        getitem_1310: "f32[][]cuda:0" = _foreach_pow_1[126]
+	        getitem_1311: "f32[][]cuda:0" = _foreach_pow_1[127]
+	        getitem_1312: "f32[][]cuda:0" = _foreach_pow_1[128]
+	        getitem_1313: "f32[][]cuda:0" = _foreach_pow_1[129]
+	        getitem_1314: "f32[][]cuda:0" = _foreach_pow_1[130]
+	        getitem_1315: "f32[][]cuda:0" = _foreach_pow_1[131]
+	        getitem_1316: "f32[][]cuda:0" = _foreach_pow_1[132]
+	        getitem_1317: "f32[][]cuda:0" = _foreach_pow_1[133]
+	        getitem_1318: "f32[][]cuda:0" = _foreach_pow_1[134]
+	        getitem_1319: "f32[][]cuda:0" = _foreach_pow_1[135]
+	        getitem_1320: "f32[][]cuda:0" = _foreach_pow_1[136]
+	        getitem_1321: "f32[][]cuda:0" = _foreach_pow_1[137]
+	        getitem_1322: "f32[][]cuda:0" = _foreach_pow_1[138]
+	        getitem_1323: "f32[][]cuda:0" = _foreach_pow_1[139]
+	        getitem_1324: "f32[][]cuda:0" = _foreach_pow_1[140]
+	        getitem_1325: "f32[][]cuda:0" = _foreach_pow_1[141]
+	        getitem_1326: "f32[][]cuda:0" = _foreach_pow_1[142]
+	        getitem_1327: "f32[][]cuda:0" = _foreach_pow_1[143]
+	        getitem_1328: "f32[][]cuda:0" = _foreach_pow_1[144]
+	        getitem_1329: "f32[][]cuda:0" = _foreach_pow_1[145]
+	        getitem_1330: "f32[][]cuda:0" = _foreach_pow_1[146]
+	        getitem_1331: "f32[][]cuda:0" = _foreach_pow_1[147];  _foreach_pow_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:558 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction1, 1)
+	        _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1);  getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None
+	        getitem_1332: "f32[][]cuda:0" = _foreach_sub_1[0]
+	        getitem_1333: "f32[][]cuda:0" = _foreach_sub_1[1]
+	        getitem_1334: "f32[][]cuda:0" = _foreach_sub_1[2]
+	        getitem_1335: "f32[][]cuda:0" = _foreach_sub_1[3]
+	        getitem_1336: "f32[][]cuda:0" = _foreach_sub_1[4]
+	        getitem_1337: "f32[][]cuda:0" = _foreach_sub_1[5]
+	        getitem_1338: "f32[][]cuda:0" = _foreach_sub_1[6]
+	        getitem_1339: "f32[][]cuda:0" = _foreach_sub_1[7]
+	        getitem_1340: "f32[][]cuda:0" = _foreach_sub_1[8]
+	        getitem_1341: "f32[][]cuda:0" = _foreach_sub_1[9]
+	        getitem_1342: "f32[][]cuda:0" = _foreach_sub_1[10]
+	        getitem_1343: "f32[][]cuda:0" = _foreach_sub_1[11]
+	        getitem_1344: "f32[][]cuda:0" = _foreach_sub_1[12]
+	        getitem_1345: "f32[][]cuda:0" = _foreach_sub_1[13]
+	        getitem_1346: "f32[][]cuda:0" = _foreach_sub_1[14]
+	        getitem_1347: "f32[][]cuda:0" = _foreach_sub_1[15]
+	        getitem_1348: "f32[][]cuda:0" = _foreach_sub_1[16]
+	        getitem_1349: "f32[][]cuda:0" = _foreach_sub_1[17]
+	        getitem_1350: "f32[][]cuda:0" = _foreach_sub_1[18]
+	        getitem_1351: "f32[][]cuda:0" = _foreach_sub_1[19]
+	        getitem_1352: "f32[][]cuda:0" = _foreach_sub_1[20]
+	        getitem_1353: "f32[][]cuda:0" = _foreach_sub_1[21]
+	        getitem_1354: "f32[][]cuda:0" = _foreach_sub_1[22]
+	        getitem_1355: "f32[][]cuda:0" = _foreach_sub_1[23]
+	        getitem_1356: "f32[][]cuda:0" = _foreach_sub_1[24]
+	        getitem_1357: "f32[][]cuda:0" = _foreach_sub_1[25]
+	        getitem_1358: "f32[][]cuda:0" = _foreach_sub_1[26]
+	        getitem_1359: "f32[][]cuda:0" = _foreach_sub_1[27]
+	        getitem_1360: "f32[][]cuda:0" = _foreach_sub_1[28]
+	        getitem_1361: "f32[][]cuda:0" = _foreach_sub_1[29]
+	        getitem_1362: "f32[][]cuda:0" = _foreach_sub_1[30]
+	        getitem_1363: "f32[][]cuda:0" = _foreach_sub_1[31]
+	        getitem_1364: "f32[][]cuda:0" = _foreach_sub_1[32]
+	        getitem_1365: "f32[][]cuda:0" = _foreach_sub_1[33]
+	        getitem_1366: "f32[][]cuda:0" = _foreach_sub_1[34]
+	        getitem_1367: "f32[][]cuda:0" = _foreach_sub_1[35]
+	        getitem_1368: "f32[][]cuda:0" = _foreach_sub_1[36]
+	        getitem_1369: "f32[][]cuda:0" = _foreach_sub_1[37]
+	        getitem_1370: "f32[][]cuda:0" = _foreach_sub_1[38]
+	        getitem_1371: "f32[][]cuda:0" = _foreach_sub_1[39]
+	        getitem_1372: "f32[][]cuda:0" = _foreach_sub_1[40]
+	        getitem_1373: "f32[][]cuda:0" = _foreach_sub_1[41]
+	        getitem_1374: "f32[][]cuda:0" = _foreach_sub_1[42]
+	        getitem_1375: "f32[][]cuda:0" = _foreach_sub_1[43]
+	        getitem_1376: "f32[][]cuda:0" = _foreach_sub_1[44]
+	        getitem_1377: "f32[][]cuda:0" = _foreach_sub_1[45]
+	        getitem_1378: "f32[][]cuda:0" = _foreach_sub_1[46]
+	        getitem_1379: "f32[][]cuda:0" = _foreach_sub_1[47]
+	        getitem_1380: "f32[][]cuda:0" = _foreach_sub_1[48]
+	        getitem_1381: "f32[][]cuda:0" = _foreach_sub_1[49]
+	        getitem_1382: "f32[][]cuda:0" = _foreach_sub_1[50]
+	        getitem_1383: "f32[][]cuda:0" = _foreach_sub_1[51]
+	        getitem_1384: "f32[][]cuda:0" = _foreach_sub_1[52]
+	        getitem_1385: "f32[][]cuda:0" = _foreach_sub_1[53]
+	        getitem_1386: "f32[][]cuda:0" = _foreach_sub_1[54]
+	        getitem_1387: "f32[][]cuda:0" = _foreach_sub_1[55]
+	        getitem_1388: "f32[][]cuda:0" = _foreach_sub_1[56]
+	        getitem_1389: "f32[][]cuda:0" = _foreach_sub_1[57]
+	        getitem_1390: "f32[][]cuda:0" = _foreach_sub_1[58]
+	        getitem_1391: "f32[][]cuda:0" = _foreach_sub_1[59]
+	        getitem_1392: "f32[][]cuda:0" = _foreach_sub_1[60]
+	        getitem_1393: "f32[][]cuda:0" = _foreach_sub_1[61]
+	        getitem_1394: "f32[][]cuda:0" = _foreach_sub_1[62]
+	        getitem_1395: "f32[][]cuda:0" = _foreach_sub_1[63]
+	        getitem_1396: "f32[][]cuda:0" = _foreach_sub_1[64]
+	        getitem_1397: "f32[][]cuda:0" = _foreach_sub_1[65]
+	        getitem_1398: "f32[][]cuda:0" = _foreach_sub_1[66]
+	        getitem_1399: "f32[][]cuda:0" = _foreach_sub_1[67]
+	        getitem_1400: "f32[][]cuda:0" = _foreach_sub_1[68]
+	        getitem_1401: "f32[][]cuda:0" = _foreach_sub_1[69]
+	        getitem_1402: "f32[][]cuda:0" = _foreach_sub_1[70]
+	        getitem_1403: "f32[][]cuda:0" = _foreach_sub_1[71]
+	        getitem_1404: "f32[][]cuda:0" = _foreach_sub_1[72]
+	        getitem_1405: "f32[][]cuda:0" = _foreach_sub_1[73]
+	        getitem_1406: "f32[][]cuda:0" = _foreach_sub_1[74]
+	        getitem_1407: "f32[][]cuda:0" = _foreach_sub_1[75]
+	        getitem_1408: "f32[][]cuda:0" = _foreach_sub_1[76]
+	        getitem_1409: "f32[][]cuda:0" = _foreach_sub_1[77]
+	        getitem_1410: "f32[][]cuda:0" = _foreach_sub_1[78]
+	        getitem_1411: "f32[][]cuda:0" = _foreach_sub_1[79]
+	        getitem_1412: "f32[][]cuda:0" = _foreach_sub_1[80]
+	        getitem_1413: "f32[][]cuda:0" = _foreach_sub_1[81]
+	        getitem_1414: "f32[][]cuda:0" = _foreach_sub_1[82]
+	        getitem_1415: "f32[][]cuda:0" = _foreach_sub_1[83]
+	        getitem_1416: "f32[][]cuda:0" = _foreach_sub_1[84]
+	        getitem_1417: "f32[][]cuda:0" = _foreach_sub_1[85]
+	        getitem_1418: "f32[][]cuda:0" = _foreach_sub_1[86]
+	        getitem_1419: "f32[][]cuda:0" = _foreach_sub_1[87]
+	        getitem_1420: "f32[][]cuda:0" = _foreach_sub_1[88]
+	        getitem_1421: "f32[][]cuda:0" = _foreach_sub_1[89]
+	        getitem_1422: "f32[][]cuda:0" = _foreach_sub_1[90]
+	        getitem_1423: "f32[][]cuda:0" = _foreach_sub_1[91]
+	        getitem_1424: "f32[][]cuda:0" = _foreach_sub_1[92]
+	        getitem_1425: "f32[][]cuda:0" = _foreach_sub_1[93]
+	        getitem_1426: "f32[][]cuda:0" = _foreach_sub_1[94]
+	        getitem_1427: "f32[][]cuda:0" = _foreach_sub_1[95]
+	        getitem_1428: "f32[][]cuda:0" = _foreach_sub_1[96]
+	        getitem_1429: "f32[][]cuda:0" = _foreach_sub_1[97]
+	        getitem_1430: "f32[][]cuda:0" = _foreach_sub_1[98]
+	        getitem_1431: "f32[][]cuda:0" = _foreach_sub_1[99]
+	        getitem_1432: "f32[][]cuda:0" = _foreach_sub_1[100]
+	        getitem_1433: "f32[][]cuda:0" = _foreach_sub_1[101]
+	        getitem_1434: "f32[][]cuda:0" = _foreach_sub_1[102]
+	        getitem_1435: "f32[][]cuda:0" = _foreach_sub_1[103]
+	        getitem_1436: "f32[][]cuda:0" = _foreach_sub_1[104]
+	        getitem_1437: "f32[][]cuda:0" = _foreach_sub_1[105]
+	        getitem_1438: "f32[][]cuda:0" = _foreach_sub_1[106]
+	        getitem_1439: "f32[][]cuda:0" = _foreach_sub_1[107]
+	        getitem_1440: "f32[][]cuda:0" = _foreach_sub_1[108]
+	        getitem_1441: "f32[][]cuda:0" = _foreach_sub_1[109]
+	        getitem_1442: "f32[][]cuda:0" = _foreach_sub_1[110]
+	        getitem_1443: "f32[][]cuda:0" = _foreach_sub_1[111]
+	        getitem_1444: "f32[][]cuda:0" = _foreach_sub_1[112]
+	        getitem_1445: "f32[][]cuda:0" = _foreach_sub_1[113]
+	        getitem_1446: "f32[][]cuda:0" = _foreach_sub_1[114]
+	        getitem_1447: "f32[][]cuda:0" = _foreach_sub_1[115]
+	        getitem_1448: "f32[][]cuda:0" = _foreach_sub_1[116]
+	        getitem_1449: "f32[][]cuda:0" = _foreach_sub_1[117]
+	        getitem_1450: "f32[][]cuda:0" = _foreach_sub_1[118]
+	        getitem_1451: "f32[][]cuda:0" = _foreach_sub_1[119]
+	        getitem_1452: "f32[][]cuda:0" = _foreach_sub_1[120]
+	        getitem_1453: "f32[][]cuda:0" = _foreach_sub_1[121]
+	        getitem_1454: "f32[][]cuda:0" = _foreach_sub_1[122]
+	        getitem_1455: "f32[][]cuda:0" = _foreach_sub_1[123]
+	        getitem_1456: "f32[][]cuda:0" = _foreach_sub_1[124]
+	        getitem_1457: "f32[][]cuda:0" = _foreach_sub_1[125]
+	        getitem_1458: "f32[][]cuda:0" = _foreach_sub_1[126]
+	        getitem_1459: "f32[][]cuda:0" = _foreach_sub_1[127]
+	        getitem_1460: "f32[][]cuda:0" = _foreach_sub_1[128]
+	        getitem_1461: "f32[][]cuda:0" = _foreach_sub_1[129]
+	        getitem_1462: "f32[][]cuda:0" = _foreach_sub_1[130]
+	        getitem_1463: "f32[][]cuda:0" = _foreach_sub_1[131]
+	        getitem_1464: "f32[][]cuda:0" = _foreach_sub_1[132]
+	        getitem_1465: "f32[][]cuda:0" = _foreach_sub_1[133]
+	        getitem_1466: "f32[][]cuda:0" = _foreach_sub_1[134]
+	        getitem_1467: "f32[][]cuda:0" = _foreach_sub_1[135]
+	        getitem_1468: "f32[][]cuda:0" = _foreach_sub_1[136]
+	        getitem_1469: "f32[][]cuda:0" = _foreach_sub_1[137]
+	        getitem_1470: "f32[][]cuda:0" = _foreach_sub_1[138]
+	        getitem_1471: "f32[][]cuda:0" = _foreach_sub_1[139]
+	        getitem_1472: "f32[][]cuda:0" = _foreach_sub_1[140]
+	        getitem_1473: "f32[][]cuda:0" = _foreach_sub_1[141]
+	        getitem_1474: "f32[][]cuda:0" = _foreach_sub_1[142]
+	        getitem_1475: "f32[][]cuda:0" = _foreach_sub_1[143]
+	        getitem_1476: "f32[][]cuda:0" = _foreach_sub_1[144]
+	        getitem_1477: "f32[][]cuda:0" = _foreach_sub_1[145]
+	        getitem_1478: "f32[][]cuda:0" = _foreach_sub_1[146]
+	        getitem_1479: "f32[][]cuda:0" = _foreach_sub_1[147];  _foreach_sub_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:559 in _multi_tensor_adam, code: torch._foreach_sub_(bias_correction2, 1)
+	        _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1);  getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None
+	        getitem_1480: "f32[][]cuda:0" = _foreach_sub_2[0]
+	        getitem_1481: "f32[][]cuda:0" = _foreach_sub_2[1]
+	        getitem_1482: "f32[][]cuda:0" = _foreach_sub_2[2]
+	        getitem_1483: "f32[][]cuda:0" = _foreach_sub_2[3]
+	        getitem_1484: "f32[][]cuda:0" = _foreach_sub_2[4]
+	        getitem_1485: "f32[][]cuda:0" = _foreach_sub_2[5]
+	        getitem_1486: "f32[][]cuda:0" = _foreach_sub_2[6]
+	        getitem_1487: "f32[][]cuda:0" = _foreach_sub_2[7]
+	        getitem_1488: "f32[][]cuda:0" = _foreach_sub_2[8]
+	        getitem_1489: "f32[][]cuda:0" = _foreach_sub_2[9]
+	        getitem_1490: "f32[][]cuda:0" = _foreach_sub_2[10]
+	        getitem_1491: "f32[][]cuda:0" = _foreach_sub_2[11]
+	        getitem_1492: "f32[][]cuda:0" = _foreach_sub_2[12]
+	        getitem_1493: "f32[][]cuda:0" = _foreach_sub_2[13]
+	        getitem_1494: "f32[][]cuda:0" = _foreach_sub_2[14]
+	        getitem_1495: "f32[][]cuda:0" = _foreach_sub_2[15]
+	        getitem_1496: "f32[][]cuda:0" = _foreach_sub_2[16]
+	        getitem_1497: "f32[][]cuda:0" = _foreach_sub_2[17]
+	        getitem_1498: "f32[][]cuda:0" = _foreach_sub_2[18]
+	        getitem_1499: "f32[][]cuda:0" = _foreach_sub_2[19]
+	        getitem_1500: "f32[][]cuda:0" = _foreach_sub_2[20]
+	        getitem_1501: "f32[][]cuda:0" = _foreach_sub_2[21]
+	        getitem_1502: "f32[][]cuda:0" = _foreach_sub_2[22]
+	        getitem_1503: "f32[][]cuda:0" = _foreach_sub_2[23]
+	        getitem_1504: "f32[][]cuda:0" = _foreach_sub_2[24]
+	        getitem_1505: "f32[][]cuda:0" = _foreach_sub_2[25]
+	        getitem_1506: "f32[][]cuda:0" = _foreach_sub_2[26]
+	        getitem_1507: "f32[][]cuda:0" = _foreach_sub_2[27]
+	        getitem_1508: "f32[][]cuda:0" = _foreach_sub_2[28]
+	        getitem_1509: "f32[][]cuda:0" = _foreach_sub_2[29]
+	        getitem_1510: "f32[][]cuda:0" = _foreach_sub_2[30]
+	        getitem_1511: "f32[][]cuda:0" = _foreach_sub_2[31]
+	        getitem_1512: "f32[][]cuda:0" = _foreach_sub_2[32]
+	        getitem_1513: "f32[][]cuda:0" = _foreach_sub_2[33]
+	        getitem_1514: "f32[][]cuda:0" = _foreach_sub_2[34]
+	        getitem_1515: "f32[][]cuda:0" = _foreach_sub_2[35]
+	        getitem_1516: "f32[][]cuda:0" = _foreach_sub_2[36]
+	        getitem_1517: "f32[][]cuda:0" = _foreach_sub_2[37]
+	        getitem_1518: "f32[][]cuda:0" = _foreach_sub_2[38]
+	        getitem_1519: "f32[][]cuda:0" = _foreach_sub_2[39]
+	        getitem_1520: "f32[][]cuda:0" = _foreach_sub_2[40]
+	        getitem_1521: "f32[][]cuda:0" = _foreach_sub_2[41]
+	        getitem_1522: "f32[][]cuda:0" = _foreach_sub_2[42]
+	        getitem_1523: "f32[][]cuda:0" = _foreach_sub_2[43]
+	        getitem_1524: "f32[][]cuda:0" = _foreach_sub_2[44]
+	        getitem_1525: "f32[][]cuda:0" = _foreach_sub_2[45]
+	        getitem_1526: "f32[][]cuda:0" = _foreach_sub_2[46]
+	        getitem_1527: "f32[][]cuda:0" = _foreach_sub_2[47]
+	        getitem_1528: "f32[][]cuda:0" = _foreach_sub_2[48]
+	        getitem_1529: "f32[][]cuda:0" = _foreach_sub_2[49]
+	        getitem_1530: "f32[][]cuda:0" = _foreach_sub_2[50]
+	        getitem_1531: "f32[][]cuda:0" = _foreach_sub_2[51]
+	        getitem_1532: "f32[][]cuda:0" = _foreach_sub_2[52]
+	        getitem_1533: "f32[][]cuda:0" = _foreach_sub_2[53]
+	        getitem_1534: "f32[][]cuda:0" = _foreach_sub_2[54]
+	        getitem_1535: "f32[][]cuda:0" = _foreach_sub_2[55]
+	        getitem_1536: "f32[][]cuda:0" = _foreach_sub_2[56]
+	        getitem_1537: "f32[][]cuda:0" = _foreach_sub_2[57]
+	        getitem_1538: "f32[][]cuda:0" = _foreach_sub_2[58]
+	        getitem_1539: "f32[][]cuda:0" = _foreach_sub_2[59]
+	        getitem_1540: "f32[][]cuda:0" = _foreach_sub_2[60]
+	        getitem_1541: "f32[][]cuda:0" = _foreach_sub_2[61]
+	        getitem_1542: "f32[][]cuda:0" = _foreach_sub_2[62]
+	        getitem_1543: "f32[][]cuda:0" = _foreach_sub_2[63]
+	        getitem_1544: "f32[][]cuda:0" = _foreach_sub_2[64]
+	        getitem_1545: "f32[][]cuda:0" = _foreach_sub_2[65]
+	        getitem_1546: "f32[][]cuda:0" = _foreach_sub_2[66]
+	        getitem_1547: "f32[][]cuda:0" = _foreach_sub_2[67]
+	        getitem_1548: "f32[][]cuda:0" = _foreach_sub_2[68]
+	        getitem_1549: "f32[][]cuda:0" = _foreach_sub_2[69]
+	        getitem_1550: "f32[][]cuda:0" = _foreach_sub_2[70]
+	        getitem_1551: "f32[][]cuda:0" = _foreach_sub_2[71]
+	        getitem_1552: "f32[][]cuda:0" = _foreach_sub_2[72]
+	        getitem_1553: "f32[][]cuda:0" = _foreach_sub_2[73]
+	        getitem_1554: "f32[][]cuda:0" = _foreach_sub_2[74]
+	        getitem_1555: "f32[][]cuda:0" = _foreach_sub_2[75]
+	        getitem_1556: "f32[][]cuda:0" = _foreach_sub_2[76]
+	        getitem_1557: "f32[][]cuda:0" = _foreach_sub_2[77]
+	        getitem_1558: "f32[][]cuda:0" = _foreach_sub_2[78]
+	        getitem_1559: "f32[][]cuda:0" = _foreach_sub_2[79]
+	        getitem_1560: "f32[][]cuda:0" = _foreach_sub_2[80]
+	        getitem_1561: "f32[][]cuda:0" = _foreach_sub_2[81]
+	        getitem_1562: "f32[][]cuda:0" = _foreach_sub_2[82]
+	        getitem_1563: "f32[][]cuda:0" = _foreach_sub_2[83]
+	        getitem_1564: "f32[][]cuda:0" = _foreach_sub_2[84]
+	        getitem_1565: "f32[][]cuda:0" = _foreach_sub_2[85]
+	        getitem_1566: "f32[][]cuda:0" = _foreach_sub_2[86]
+	        getitem_1567: "f32[][]cuda:0" = _foreach_sub_2[87]
+	        getitem_1568: "f32[][]cuda:0" = _foreach_sub_2[88]
+	        getitem_1569: "f32[][]cuda:0" = _foreach_sub_2[89]
+	        getitem_1570: "f32[][]cuda:0" = _foreach_sub_2[90]
+	        getitem_1571: "f32[][]cuda:0" = _foreach_sub_2[91]
+	        getitem_1572: "f32[][]cuda:0" = _foreach_sub_2[92]
+	        getitem_1573: "f32[][]cuda:0" = _foreach_sub_2[93]
+	        getitem_1574: "f32[][]cuda:0" = _foreach_sub_2[94]
+	        getitem_1575: "f32[][]cuda:0" = _foreach_sub_2[95]
+	        getitem_1576: "f32[][]cuda:0" = _foreach_sub_2[96]
+	        getitem_1577: "f32[][]cuda:0" = _foreach_sub_2[97]
+	        getitem_1578: "f32[][]cuda:0" = _foreach_sub_2[98]
+	        getitem_1579: "f32[][]cuda:0" = _foreach_sub_2[99]
+	        getitem_1580: "f32[][]cuda:0" = _foreach_sub_2[100]
+	        getitem_1581: "f32[][]cuda:0" = _foreach_sub_2[101]
+	        getitem_1582: "f32[][]cuda:0" = _foreach_sub_2[102]
+	        getitem_1583: "f32[][]cuda:0" = _foreach_sub_2[103]
+	        getitem_1584: "f32[][]cuda:0" = _foreach_sub_2[104]
+	        getitem_1585: "f32[][]cuda:0" = _foreach_sub_2[105]
+	        getitem_1586: "f32[][]cuda:0" = _foreach_sub_2[106]
+	        getitem_1587: "f32[][]cuda:0" = _foreach_sub_2[107]
+	        getitem_1588: "f32[][]cuda:0" = _foreach_sub_2[108]
+	        getitem_1589: "f32[][]cuda:0" = _foreach_sub_2[109]
+	        getitem_1590: "f32[][]cuda:0" = _foreach_sub_2[110]
+	        getitem_1591: "f32[][]cuda:0" = _foreach_sub_2[111]
+	        getitem_1592: "f32[][]cuda:0" = _foreach_sub_2[112]
+	        getitem_1593: "f32[][]cuda:0" = _foreach_sub_2[113]
+	        getitem_1594: "f32[][]cuda:0" = _foreach_sub_2[114]
+	        getitem_1595: "f32[][]cuda:0" = _foreach_sub_2[115]
+	        getitem_1596: "f32[][]cuda:0" = _foreach_sub_2[116]
+	        getitem_1597: "f32[][]cuda:0" = _foreach_sub_2[117]
+	        getitem_1598: "f32[][]cuda:0" = _foreach_sub_2[118]
+	        getitem_1599: "f32[][]cuda:0" = _foreach_sub_2[119]
+	        getitem_1600: "f32[][]cuda:0" = _foreach_sub_2[120]
+	        getitem_1601: "f32[][]cuda:0" = _foreach_sub_2[121]
+	        getitem_1602: "f32[][]cuda:0" = _foreach_sub_2[122]
+	        getitem_1603: "f32[][]cuda:0" = _foreach_sub_2[123]
+	        getitem_1604: "f32[][]cuda:0" = _foreach_sub_2[124]
+	        getitem_1605: "f32[][]cuda:0" = _foreach_sub_2[125]
+	        getitem_1606: "f32[][]cuda:0" = _foreach_sub_2[126]
+	        getitem_1607: "f32[][]cuda:0" = _foreach_sub_2[127]
+	        getitem_1608: "f32[][]cuda:0" = _foreach_sub_2[128]
+	        getitem_1609: "f32[][]cuda:0" = _foreach_sub_2[129]
+	        getitem_1610: "f32[][]cuda:0" = _foreach_sub_2[130]
+	        getitem_1611: "f32[][]cuda:0" = _foreach_sub_2[131]
+	        getitem_1612: "f32[][]cuda:0" = _foreach_sub_2[132]
+	        getitem_1613: "f32[][]cuda:0" = _foreach_sub_2[133]
+	        getitem_1614: "f32[][]cuda:0" = _foreach_sub_2[134]
+	        getitem_1615: "f32[][]cuda:0" = _foreach_sub_2[135]
+	        getitem_1616: "f32[][]cuda:0" = _foreach_sub_2[136]
+	        getitem_1617: "f32[][]cuda:0" = _foreach_sub_2[137]
+	        getitem_1618: "f32[][]cuda:0" = _foreach_sub_2[138]
+	        getitem_1619: "f32[][]cuda:0" = _foreach_sub_2[139]
+	        getitem_1620: "f32[][]cuda:0" = _foreach_sub_2[140]
+	        getitem_1621: "f32[][]cuda:0" = _foreach_sub_2[141]
+	        getitem_1622: "f32[][]cuda:0" = _foreach_sub_2[142]
+	        getitem_1623: "f32[][]cuda:0" = _foreach_sub_2[143]
+	        getitem_1624: "f32[][]cuda:0" = _foreach_sub_2[144]
+	        getitem_1625: "f32[][]cuda:0" = _foreach_sub_2[145]
+	        getitem_1626: "f32[][]cuda:0" = _foreach_sub_2[146]
+	        getitem_1627: "f32[][]cuda:0" = _foreach_sub_2[147];  _foreach_sub_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:561 in _multi_tensor_adam, code: torch._foreach_neg_(bias_correction2)
+	        _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]);  getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None
+	        getitem_1628: "f32[][]cuda:0" = _foreach_neg[0]
+	        getitem_1629: "f32[][]cuda:0" = _foreach_neg[1]
+	        getitem_1630: "f32[][]cuda:0" = _foreach_neg[2]
+	        getitem_1631: "f32[][]cuda:0" = _foreach_neg[3]
+	        getitem_1632: "f32[][]cuda:0" = _foreach_neg[4]
+	        getitem_1633: "f32[][]cuda:0" = _foreach_neg[5]
+	        getitem_1634: "f32[][]cuda:0" = _foreach_neg[6]
+	        getitem_1635: "f32[][]cuda:0" = _foreach_neg[7]
+	        getitem_1636: "f32[][]cuda:0" = _foreach_neg[8]
+	        getitem_1637: "f32[][]cuda:0" = _foreach_neg[9]
+	        getitem_1638: "f32[][]cuda:0" = _foreach_neg[10]
+	        getitem_1639: "f32[][]cuda:0" = _foreach_neg[11]
+	        getitem_1640: "f32[][]cuda:0" = _foreach_neg[12]
+	        getitem_1641: "f32[][]cuda:0" = _foreach_neg[13]
+	        getitem_1642: "f32[][]cuda:0" = _foreach_neg[14]
+	        getitem_1643: "f32[][]cuda:0" = _foreach_neg[15]
+	        getitem_1644: "f32[][]cuda:0" = _foreach_neg[16]
+	        getitem_1645: "f32[][]cuda:0" = _foreach_neg[17]
+	        getitem_1646: "f32[][]cuda:0" = _foreach_neg[18]
+	        getitem_1647: "f32[][]cuda:0" = _foreach_neg[19]
+	        getitem_1648: "f32[][]cuda:0" = _foreach_neg[20]
+	        getitem_1649: "f32[][]cuda:0" = _foreach_neg[21]
+	        getitem_1650: "f32[][]cuda:0" = _foreach_neg[22]
+	        getitem_1651: "f32[][]cuda:0" = _foreach_neg[23]
+	        getitem_1652: "f32[][]cuda:0" = _foreach_neg[24]
+	        getitem_1653: "f32[][]cuda:0" = _foreach_neg[25]
+	        getitem_1654: "f32[][]cuda:0" = _foreach_neg[26]
+	        getitem_1655: "f32[][]cuda:0" = _foreach_neg[27]
+	        getitem_1656: "f32[][]cuda:0" = _foreach_neg[28]
+	        getitem_1657: "f32[][]cuda:0" = _foreach_neg[29]
+	        getitem_1658: "f32[][]cuda:0" = _foreach_neg[30]
+	        getitem_1659: "f32[][]cuda:0" = _foreach_neg[31]
+	        getitem_1660: "f32[][]cuda:0" = _foreach_neg[32]
+	        getitem_1661: "f32[][]cuda:0" = _foreach_neg[33]
+	        getitem_1662: "f32[][]cuda:0" = _foreach_neg[34]
+	        getitem_1663: "f32[][]cuda:0" = _foreach_neg[35]
+	        getitem_1664: "f32[][]cuda:0" = _foreach_neg[36]
+	        getitem_1665: "f32[][]cuda:0" = _foreach_neg[37]
+	        getitem_1666: "f32[][]cuda:0" = _foreach_neg[38]
+	        getitem_1667: "f32[][]cuda:0" = _foreach_neg[39]
+	        getitem_1668: "f32[][]cuda:0" = _foreach_neg[40]
+	        getitem_1669: "f32[][]cuda:0" = _foreach_neg[41]
+	        getitem_1670: "f32[][]cuda:0" = _foreach_neg[42]
+	        getitem_1671: "f32[][]cuda:0" = _foreach_neg[43]
+	        getitem_1672: "f32[][]cuda:0" = _foreach_neg[44]
+	        getitem_1673: "f32[][]cuda:0" = _foreach_neg[45]
+	        getitem_1674: "f32[][]cuda:0" = _foreach_neg[46]
+	        getitem_1675: "f32[][]cuda:0" = _foreach_neg[47]
+	        getitem_1676: "f32[][]cuda:0" = _foreach_neg[48]
+	        getitem_1677: "f32[][]cuda:0" = _foreach_neg[49]
+	        getitem_1678: "f32[][]cuda:0" = _foreach_neg[50]
+	        getitem_1679: "f32[][]cuda:0" = _foreach_neg[51]
+	        getitem_1680: "f32[][]cuda:0" = _foreach_neg[52]
+	        getitem_1681: "f32[][]cuda:0" = _foreach_neg[53]
+	        getitem_1682: "f32[][]cuda:0" = _foreach_neg[54]
+	        getitem_1683: "f32[][]cuda:0" = _foreach_neg[55]
+	        getitem_1684: "f32[][]cuda:0" = _foreach_neg[56]
+	        getitem_1685: "f32[][]cuda:0" = _foreach_neg[57]
+	        getitem_1686: "f32[][]cuda:0" = _foreach_neg[58]
+	        getitem_1687: "f32[][]cuda:0" = _foreach_neg[59]
+	        getitem_1688: "f32[][]cuda:0" = _foreach_neg[60]
+	        getitem_1689: "f32[][]cuda:0" = _foreach_neg[61]
+	        getitem_1690: "f32[][]cuda:0" = _foreach_neg[62]
+	        getitem_1691: "f32[][]cuda:0" = _foreach_neg[63]
+	        getitem_1692: "f32[][]cuda:0" = _foreach_neg[64]
+	        getitem_1693: "f32[][]cuda:0" = _foreach_neg[65]
+	        getitem_1694: "f32[][]cuda:0" = _foreach_neg[66]
+	        getitem_1695: "f32[][]cuda:0" = _foreach_neg[67]
+	        getitem_1696: "f32[][]cuda:0" = _foreach_neg[68]
+	        getitem_1697: "f32[][]cuda:0" = _foreach_neg[69]
+	        getitem_1698: "f32[][]cuda:0" = _foreach_neg[70]
+	        getitem_1699: "f32[][]cuda:0" = _foreach_neg[71]
+	        getitem_1700: "f32[][]cuda:0" = _foreach_neg[72]
+	        getitem_1701: "f32[][]cuda:0" = _foreach_neg[73]
+	        getitem_1702: "f32[][]cuda:0" = _foreach_neg[74]
+	        getitem_1703: "f32[][]cuda:0" = _foreach_neg[75]
+	        getitem_1704: "f32[][]cuda:0" = _foreach_neg[76]
+	        getitem_1705: "f32[][]cuda:0" = _foreach_neg[77]
+	        getitem_1706: "f32[][]cuda:0" = _foreach_neg[78]
+	        getitem_1707: "f32[][]cuda:0" = _foreach_neg[79]
+	        getitem_1708: "f32[][]cuda:0" = _foreach_neg[80]
+	        getitem_1709: "f32[][]cuda:0" = _foreach_neg[81]
+	        getitem_1710: "f32[][]cuda:0" = _foreach_neg[82]
+	        getitem_1711: "f32[][]cuda:0" = _foreach_neg[83]
+	        getitem_1712: "f32[][]cuda:0" = _foreach_neg[84]
+	        getitem_1713: "f32[][]cuda:0" = _foreach_neg[85]
+	        getitem_1714: "f32[][]cuda:0" = _foreach_neg[86]
+	        getitem_1715: "f32[][]cuda:0" = _foreach_neg[87]
+	        getitem_1716: "f32[][]cuda:0" = _foreach_neg[88]
+	        getitem_1717: "f32[][]cuda:0" = _foreach_neg[89]
+	        getitem_1718: "f32[][]cuda:0" = _foreach_neg[90]
+	        getitem_1719: "f32[][]cuda:0" = _foreach_neg[91]
+	        getitem_1720: "f32[][]cuda:0" = _foreach_neg[92]
+	        getitem_1721: "f32[][]cuda:0" = _foreach_neg[93]
+	        getitem_1722: "f32[][]cuda:0" = _foreach_neg[94]
+	        getitem_1723: "f32[][]cuda:0" = _foreach_neg[95]
+	        getitem_1724: "f32[][]cuda:0" = _foreach_neg[96]
+	        getitem_1725: "f32[][]cuda:0" = _foreach_neg[97]
+	        getitem_1726: "f32[][]cuda:0" = _foreach_neg[98]
+	        getitem_1727: "f32[][]cuda:0" = _foreach_neg[99]
+	        getitem_1728: "f32[][]cuda:0" = _foreach_neg[100]
+	        getitem_1729: "f32[][]cuda:0" = _foreach_neg[101]
+	        getitem_1730: "f32[][]cuda:0" = _foreach_neg[102]
+	        getitem_1731: "f32[][]cuda:0" = _foreach_neg[103]
+	        getitem_1732: "f32[][]cuda:0" = _foreach_neg[104]
+	        getitem_1733: "f32[][]cuda:0" = _foreach_neg[105]
+	        getitem_1734: "f32[][]cuda:0" = _foreach_neg[106]
+	        getitem_1735: "f32[][]cuda:0" = _foreach_neg[107]
+	        getitem_1736: "f32[][]cuda:0" = _foreach_neg[108]
+	        getitem_1737: "f32[][]cuda:0" = _foreach_neg[109]
+	        getitem_1738: "f32[][]cuda:0" = _foreach_neg[110]
+	        getitem_1739: "f32[][]cuda:0" = _foreach_neg[111]
+	        getitem_1740: "f32[][]cuda:0" = _foreach_neg[112]
+	        getitem_1741: "f32[][]cuda:0" = _foreach_neg[113]
+	        getitem_1742: "f32[][]cuda:0" = _foreach_neg[114]
+	        getitem_1743: "f32[][]cuda:0" = _foreach_neg[115]
+	        getitem_1744: "f32[][]cuda:0" = _foreach_neg[116]
+	        getitem_1745: "f32[][]cuda:0" = _foreach_neg[117]
+	        getitem_1746: "f32[][]cuda:0" = _foreach_neg[118]
+	        getitem_1747: "f32[][]cuda:0" = _foreach_neg[119]
+	        getitem_1748: "f32[][]cuda:0" = _foreach_neg[120]
+	        getitem_1749: "f32[][]cuda:0" = _foreach_neg[121]
+	        getitem_1750: "f32[][]cuda:0" = _foreach_neg[122]
+	        getitem_1751: "f32[][]cuda:0" = _foreach_neg[123]
+	        getitem_1752: "f32[][]cuda:0" = _foreach_neg[124]
+	        getitem_1753: "f32[][]cuda:0" = _foreach_neg[125]
+	        getitem_1754: "f32[][]cuda:0" = _foreach_neg[126]
+	        getitem_1755: "f32[][]cuda:0" = _foreach_neg[127]
+	        getitem_1756: "f32[][]cuda:0" = _foreach_neg[128]
+	        getitem_1757: "f32[][]cuda:0" = _foreach_neg[129]
+	        getitem_1758: "f32[][]cuda:0" = _foreach_neg[130]
+	        getitem_1759: "f32[][]cuda:0" = _foreach_neg[131]
+	        getitem_1760: "f32[][]cuda:0" = _foreach_neg[132]
+	        getitem_1761: "f32[][]cuda:0" = _foreach_neg[133]
+	        getitem_1762: "f32[][]cuda:0" = _foreach_neg[134]
+	        getitem_1763: "f32[][]cuda:0" = _foreach_neg[135]
+	        getitem_1764: "f32[][]cuda:0" = _foreach_neg[136]
+	        getitem_1765: "f32[][]cuda:0" = _foreach_neg[137]
+	        getitem_1766: "f32[][]cuda:0" = _foreach_neg[138]
+	        getitem_1767: "f32[][]cuda:0" = _foreach_neg[139]
+	        getitem_1768: "f32[][]cuda:0" = _foreach_neg[140]
+	        getitem_1769: "f32[][]cuda:0" = _foreach_neg[141]
+	        getitem_1770: "f32[][]cuda:0" = _foreach_neg[142]
+	        getitem_1771: "f32[][]cuda:0" = _foreach_neg[143]
+	        getitem_1772: "f32[][]cuda:0" = _foreach_neg[144]
+	        getitem_1773: "f32[][]cuda:0" = _foreach_neg[145]
+	        getitem_1774: "f32[][]cuda:0" = _foreach_neg[146]
+	        getitem_1775: "f32[][]cuda:0" = _foreach_neg[147];  _foreach_neg = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:564 in _multi_tensor_adam, code: torch._foreach_div_(bias_correction1, lr)
+	        _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01);  getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None
+	        getitem_1776: "f32[][]cuda:0" = _foreach_div[0]
+	        getitem_1777: "f32[][]cuda:0" = _foreach_div[1]
+	        getitem_1778: "f32[][]cuda:0" = _foreach_div[2]
+	        getitem_1779: "f32[][]cuda:0" = _foreach_div[3]
+	        getitem_1780: "f32[][]cuda:0" = _foreach_div[4]
+	        getitem_1781: "f32[][]cuda:0" = _foreach_div[5]
+	        getitem_1782: "f32[][]cuda:0" = _foreach_div[6]
+	        getitem_1783: "f32[][]cuda:0" = _foreach_div[7]
+	        getitem_1784: "f32[][]cuda:0" = _foreach_div[8]
+	        getitem_1785: "f32[][]cuda:0" = _foreach_div[9]
+	        getitem_1786: "f32[][]cuda:0" = _foreach_div[10]
+	        getitem_1787: "f32[][]cuda:0" = _foreach_div[11]
+	        getitem_1788: "f32[][]cuda:0" = _foreach_div[12]
+	        getitem_1789: "f32[][]cuda:0" = _foreach_div[13]
+	        getitem_1790: "f32[][]cuda:0" = _foreach_div[14]
+	        getitem_1791: "f32[][]cuda:0" = _foreach_div[15]
+	        getitem_1792: "f32[][]cuda:0" = _foreach_div[16]
+	        getitem_1793: "f32[][]cuda:0" = _foreach_div[17]
+	        getitem_1794: "f32[][]cuda:0" = _foreach_div[18]
+	        getitem_1795: "f32[][]cuda:0" = _foreach_div[19]
+	        getitem_1796: "f32[][]cuda:0" = _foreach_div[20]
+	        getitem_1797: "f32[][]cuda:0" = _foreach_div[21]
+	        getitem_1798: "f32[][]cuda:0" = _foreach_div[22]
+	        getitem_1799: "f32[][]cuda:0" = _foreach_div[23]
+	        getitem_1800: "f32[][]cuda:0" = _foreach_div[24]
+	        getitem_1801: "f32[][]cuda:0" = _foreach_div[25]
+	        getitem_1802: "f32[][]cuda:0" = _foreach_div[26]
+	        getitem_1803: "f32[][]cuda:0" = _foreach_div[27]
+	        getitem_1804: "f32[][]cuda:0" = _foreach_div[28]
+	        getitem_1805: "f32[][]cuda:0" = _foreach_div[29]
+	        getitem_1806: "f32[][]cuda:0" = _foreach_div[30]
+	        getitem_1807: "f32[][]cuda:0" = _foreach_div[31]
+	        getitem_1808: "f32[][]cuda:0" = _foreach_div[32]
+	        getitem_1809: "f32[][]cuda:0" = _foreach_div[33]
+	        getitem_1810: "f32[][]cuda:0" = _foreach_div[34]
+	        getitem_1811: "f32[][]cuda:0" = _foreach_div[35]
+	        getitem_1812: "f32[][]cuda:0" = _foreach_div[36]
+	        getitem_1813: "f32[][]cuda:0" = _foreach_div[37]
+	        getitem_1814: "f32[][]cuda:0" = _foreach_div[38]
+	        getitem_1815: "f32[][]cuda:0" = _foreach_div[39]
+	        getitem_1816: "f32[][]cuda:0" = _foreach_div[40]
+	        getitem_1817: "f32[][]cuda:0" = _foreach_div[41]
+	        getitem_1818: "f32[][]cuda:0" = _foreach_div[42]
+	        getitem_1819: "f32[][]cuda:0" = _foreach_div[43]
+	        getitem_1820: "f32[][]cuda:0" = _foreach_div[44]
+	        getitem_1821: "f32[][]cuda:0" = _foreach_div[45]
+	        getitem_1822: "f32[][]cuda:0" = _foreach_div[46]
+	        getitem_1823: "f32[][]cuda:0" = _foreach_div[47]
+	        getitem_1824: "f32[][]cuda:0" = _foreach_div[48]
+	        getitem_1825: "f32[][]cuda:0" = _foreach_div[49]
+	        getitem_1826: "f32[][]cuda:0" = _foreach_div[50]
+	        getitem_1827: "f32[][]cuda:0" = _foreach_div[51]
+	        getitem_1828: "f32[][]cuda:0" = _foreach_div[52]
+	        getitem_1829: "f32[][]cuda:0" = _foreach_div[53]
+	        getitem_1830: "f32[][]cuda:0" = _foreach_div[54]
+	        getitem_1831: "f32[][]cuda:0" = _foreach_div[55]
+	        getitem_1832: "f32[][]cuda:0" = _foreach_div[56]
+	        getitem_1833: "f32[][]cuda:0" = _foreach_div[57]
+	        getitem_1834: "f32[][]cuda:0" = _foreach_div[58]
+	        getitem_1835: "f32[][]cuda:0" = _foreach_div[59]
+	        getitem_1836: "f32[][]cuda:0" = _foreach_div[60]
+	        getitem_1837: "f32[][]cuda:0" = _foreach_div[61]
+	        getitem_1838: "f32[][]cuda:0" = _foreach_div[62]
+	        getitem_1839: "f32[][]cuda:0" = _foreach_div[63]
+	        getitem_1840: "f32[][]cuda:0" = _foreach_div[64]
+	        getitem_1841: "f32[][]cuda:0" = _foreach_div[65]
+	        getitem_1842: "f32[][]cuda:0" = _foreach_div[66]
+	        getitem_1843: "f32[][]cuda:0" = _foreach_div[67]
+	        getitem_1844: "f32[][]cuda:0" = _foreach_div[68]
+	        getitem_1845: "f32[][]cuda:0" = _foreach_div[69]
+	        getitem_1846: "f32[][]cuda:0" = _foreach_div[70]
+	        getitem_1847: "f32[][]cuda:0" = _foreach_div[71]
+	        getitem_1848: "f32[][]cuda:0" = _foreach_div[72]
+	        getitem_1849: "f32[][]cuda:0" = _foreach_div[73]
+	        getitem_1850: "f32[][]cuda:0" = _foreach_div[74]
+	        getitem_1851: "f32[][]cuda:0" = _foreach_div[75]
+	        getitem_1852: "f32[][]cuda:0" = _foreach_div[76]
+	        getitem_1853: "f32[][]cuda:0" = _foreach_div[77]
+	        getitem_1854: "f32[][]cuda:0" = _foreach_div[78]
+	        getitem_1855: "f32[][]cuda:0" = _foreach_div[79]
+	        getitem_1856: "f32[][]cuda:0" = _foreach_div[80]
+	        getitem_1857: "f32[][]cuda:0" = _foreach_div[81]
+	        getitem_1858: "f32[][]cuda:0" = _foreach_div[82]
+	        getitem_1859: "f32[][]cuda:0" = _foreach_div[83]
+	        getitem_1860: "f32[][]cuda:0" = _foreach_div[84]
+	        getitem_1861: "f32[][]cuda:0" = _foreach_div[85]
+	        getitem_1862: "f32[][]cuda:0" = _foreach_div[86]
+	        getitem_1863: "f32[][]cuda:0" = _foreach_div[87]
+	        getitem_1864: "f32[][]cuda:0" = _foreach_div[88]
+	        getitem_1865: "f32[][]cuda:0" = _foreach_div[89]
+	        getitem_1866: "f32[][]cuda:0" = _foreach_div[90]
+	        getitem_1867: "f32[][]cuda:0" = _foreach_div[91]
+	        getitem_1868: "f32[][]cuda:0" = _foreach_div[92]
+	        getitem_1869: "f32[][]cuda:0" = _foreach_div[93]
+	        getitem_1870: "f32[][]cuda:0" = _foreach_div[94]
+	        getitem_1871: "f32[][]cuda:0" = _foreach_div[95]
+	        getitem_1872: "f32[][]cuda:0" = _foreach_div[96]
+	        getitem_1873: "f32[][]cuda:0" = _foreach_div[97]
+	        getitem_1874: "f32[][]cuda:0" = _foreach_div[98]
+	        getitem_1875: "f32[][]cuda:0" = _foreach_div[99]
+	        getitem_1876: "f32[][]cuda:0" = _foreach_div[100]
+	        getitem_1877: "f32[][]cuda:0" = _foreach_div[101]
+	        getitem_1878: "f32[][]cuda:0" = _foreach_div[102]
+	        getitem_1879: "f32[][]cuda:0" = _foreach_div[103]
+	        getitem_1880: "f32[][]cuda:0" = _foreach_div[104]
+	        getitem_1881: "f32[][]cuda:0" = _foreach_div[105]
+	        getitem_1882: "f32[][]cuda:0" = _foreach_div[106]
+	        getitem_1883: "f32[][]cuda:0" = _foreach_div[107]
+	        getitem_1884: "f32[][]cuda:0" = _foreach_div[108]
+	        getitem_1885: "f32[][]cuda:0" = _foreach_div[109]
+	        getitem_1886: "f32[][]cuda:0" = _foreach_div[110]
+	        getitem_1887: "f32[][]cuda:0" = _foreach_div[111]
+	        getitem_1888: "f32[][]cuda:0" = _foreach_div[112]
+	        getitem_1889: "f32[][]cuda:0" = _foreach_div[113]
+	        getitem_1890: "f32[][]cuda:0" = _foreach_div[114]
+	        getitem_1891: "f32[][]cuda:0" = _foreach_div[115]
+	        getitem_1892: "f32[][]cuda:0" = _foreach_div[116]
+	        getitem_1893: "f32[][]cuda:0" = _foreach_div[117]
+	        getitem_1894: "f32[][]cuda:0" = _foreach_div[118]
+	        getitem_1895: "f32[][]cuda:0" = _foreach_div[119]
+	        getitem_1896: "f32[][]cuda:0" = _foreach_div[120]
+	        getitem_1897: "f32[][]cuda:0" = _foreach_div[121]
+	        getitem_1898: "f32[][]cuda:0" = _foreach_div[122]
+	        getitem_1899: "f32[][]cuda:0" = _foreach_div[123]
+	        getitem_1900: "f32[][]cuda:0" = _foreach_div[124]
+	        getitem_1901: "f32[][]cuda:0" = _foreach_div[125]
+	        getitem_1902: "f32[][]cuda:0" = _foreach_div[126]
+	        getitem_1903: "f32[][]cuda:0" = _foreach_div[127]
+	        getitem_1904: "f32[][]cuda:0" = _foreach_div[128]
+	        getitem_1905: "f32[][]cuda:0" = _foreach_div[129]
+	        getitem_1906: "f32[][]cuda:0" = _foreach_div[130]
+	        getitem_1907: "f32[][]cuda:0" = _foreach_div[131]
+	        getitem_1908: "f32[][]cuda:0" = _foreach_div[132]
+	        getitem_1909: "f32[][]cuda:0" = _foreach_div[133]
+	        getitem_1910: "f32[][]cuda:0" = _foreach_div[134]
+	        getitem_1911: "f32[][]cuda:0" = _foreach_div[135]
+	        getitem_1912: "f32[][]cuda:0" = _foreach_div[136]
+	        getitem_1913: "f32[][]cuda:0" = _foreach_div[137]
+	        getitem_1914: "f32[][]cuda:0" = _foreach_div[138]
+	        getitem_1915: "f32[][]cuda:0" = _foreach_div[139]
+	        getitem_1916: "f32[][]cuda:0" = _foreach_div[140]
+	        getitem_1917: "f32[][]cuda:0" = _foreach_div[141]
+	        getitem_1918: "f32[][]cuda:0" = _foreach_div[142]
+	        getitem_1919: "f32[][]cuda:0" = _foreach_div[143]
+	        getitem_1920: "f32[][]cuda:0" = _foreach_div[144]
+	        getitem_1921: "f32[][]cuda:0" = _foreach_div[145]
+	        getitem_1922: "f32[][]cuda:0" = _foreach_div[146]
+	        getitem_1923: "f32[][]cuda:0" = _foreach_div[147];  _foreach_div = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:565 in _multi_tensor_adam, code: torch._foreach_reciprocal_(bias_correction1)
+	        _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]);  getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None
+	        getitem_1924: "f32[][]cuda:0" = _foreach_reciprocal[0]
+	        getitem_1925: "f32[][]cuda:0" = _foreach_reciprocal[1]
+	        getitem_1926: "f32[][]cuda:0" = _foreach_reciprocal[2]
+	        getitem_1927: "f32[][]cuda:0" = _foreach_reciprocal[3]
+	        getitem_1928: "f32[][]cuda:0" = _foreach_reciprocal[4]
+	        getitem_1929: "f32[][]cuda:0" = _foreach_reciprocal[5]
+	        getitem_1930: "f32[][]cuda:0" = _foreach_reciprocal[6]
+	        getitem_1931: "f32[][]cuda:0" = _foreach_reciprocal[7]
+	        getitem_1932: "f32[][]cuda:0" = _foreach_reciprocal[8]
+	        getitem_1933: "f32[][]cuda:0" = _foreach_reciprocal[9]
+	        getitem_1934: "f32[][]cuda:0" = _foreach_reciprocal[10]
+	        getitem_1935: "f32[][]cuda:0" = _foreach_reciprocal[11]
+	        getitem_1936: "f32[][]cuda:0" = _foreach_reciprocal[12]
+	        getitem_1937: "f32[][]cuda:0" = _foreach_reciprocal[13]
+	        getitem_1938: "f32[][]cuda:0" = _foreach_reciprocal[14]
+	        getitem_1939: "f32[][]cuda:0" = _foreach_reciprocal[15]
+	        getitem_1940: "f32[][]cuda:0" = _foreach_reciprocal[16]
+	        getitem_1941: "f32[][]cuda:0" = _foreach_reciprocal[17]
+	        getitem_1942: "f32[][]cuda:0" = _foreach_reciprocal[18]
+	        getitem_1943: "f32[][]cuda:0" = _foreach_reciprocal[19]
+	        getitem_1944: "f32[][]cuda:0" = _foreach_reciprocal[20]
+	        getitem_1945: "f32[][]cuda:0" = _foreach_reciprocal[21]
+	        getitem_1946: "f32[][]cuda:0" = _foreach_reciprocal[22]
+	        getitem_1947: "f32[][]cuda:0" = _foreach_reciprocal[23]
+	        getitem_1948: "f32[][]cuda:0" = _foreach_reciprocal[24]
+	        getitem_1949: "f32[][]cuda:0" = _foreach_reciprocal[25]
+	        getitem_1950: "f32[][]cuda:0" = _foreach_reciprocal[26]
+	        getitem_1951: "f32[][]cuda:0" = _foreach_reciprocal[27]
+	        getitem_1952: "f32[][]cuda:0" = _foreach_reciprocal[28]
+	        getitem_1953: "f32[][]cuda:0" = _foreach_reciprocal[29]
+	        getitem_1954: "f32[][]cuda:0" = _foreach_reciprocal[30]
+	        getitem_1955: "f32[][]cuda:0" = _foreach_reciprocal[31]
+	        getitem_1956: "f32[][]cuda:0" = _foreach_reciprocal[32]
+	        getitem_1957: "f32[][]cuda:0" = _foreach_reciprocal[33]
+	        getitem_1958: "f32[][]cuda:0" = _foreach_reciprocal[34]
+	        getitem_1959: "f32[][]cuda:0" = _foreach_reciprocal[35]
+	        getitem_1960: "f32[][]cuda:0" = _foreach_reciprocal[36]
+	        getitem_1961: "f32[][]cuda:0" = _foreach_reciprocal[37]
+	        getitem_1962: "f32[][]cuda:0" = _foreach_reciprocal[38]
+	        getitem_1963: "f32[][]cuda:0" = _foreach_reciprocal[39]
+	        getitem_1964: "f32[][]cuda:0" = _foreach_reciprocal[40]
+	        getitem_1965: "f32[][]cuda:0" = _foreach_reciprocal[41]
+	        getitem_1966: "f32[][]cuda:0" = _foreach_reciprocal[42]
+	        getitem_1967: "f32[][]cuda:0" = _foreach_reciprocal[43]
+	        getitem_1968: "f32[][]cuda:0" = _foreach_reciprocal[44]
+	        getitem_1969: "f32[][]cuda:0" = _foreach_reciprocal[45]
+	        getitem_1970: "f32[][]cuda:0" = _foreach_reciprocal[46]
+	        getitem_1971: "f32[][]cuda:0" = _foreach_reciprocal[47]
+	        getitem_1972: "f32[][]cuda:0" = _foreach_reciprocal[48]
+	        getitem_1973: "f32[][]cuda:0" = _foreach_reciprocal[49]
+	        getitem_1974: "f32[][]cuda:0" = _foreach_reciprocal[50]
+	        getitem_1975: "f32[][]cuda:0" = _foreach_reciprocal[51]
+	        getitem_1976: "f32[][]cuda:0" = _foreach_reciprocal[52]
+	        getitem_1977: "f32[][]cuda:0" = _foreach_reciprocal[53]
+	        getitem_1978: "f32[][]cuda:0" = _foreach_reciprocal[54]
+	        getitem_1979: "f32[][]cuda:0" = _foreach_reciprocal[55]
+	        getitem_1980: "f32[][]cuda:0" = _foreach_reciprocal[56]
+	        getitem_1981: "f32[][]cuda:0" = _foreach_reciprocal[57]
+	        getitem_1982: "f32[][]cuda:0" = _foreach_reciprocal[58]
+	        getitem_1983: "f32[][]cuda:0" = _foreach_reciprocal[59]
+	        getitem_1984: "f32[][]cuda:0" = _foreach_reciprocal[60]
+	        getitem_1985: "f32[][]cuda:0" = _foreach_reciprocal[61]
+	        getitem_1986: "f32[][]cuda:0" = _foreach_reciprocal[62]
+	        getitem_1987: "f32[][]cuda:0" = _foreach_reciprocal[63]
+	        getitem_1988: "f32[][]cuda:0" = _foreach_reciprocal[64]
+	        getitem_1989: "f32[][]cuda:0" = _foreach_reciprocal[65]
+	        getitem_1990: "f32[][]cuda:0" = _foreach_reciprocal[66]
+	        getitem_1991: "f32[][]cuda:0" = _foreach_reciprocal[67]
+	        getitem_1992: "f32[][]cuda:0" = _foreach_reciprocal[68]
+	        getitem_1993: "f32[][]cuda:0" = _foreach_reciprocal[69]
+	        getitem_1994: "f32[][]cuda:0" = _foreach_reciprocal[70]
+	        getitem_1995: "f32[][]cuda:0" = _foreach_reciprocal[71]
+	        getitem_1996: "f32[][]cuda:0" = _foreach_reciprocal[72]
+	        getitem_1997: "f32[][]cuda:0" = _foreach_reciprocal[73]
+	        getitem_1998: "f32[][]cuda:0" = _foreach_reciprocal[74]
+	        getitem_1999: "f32[][]cuda:0" = _foreach_reciprocal[75]
+	        getitem_2000: "f32[][]cuda:0" = _foreach_reciprocal[76]
+	        getitem_2001: "f32[][]cuda:0" = _foreach_reciprocal[77]
+	        getitem_2002: "f32[][]cuda:0" = _foreach_reciprocal[78]
+	        getitem_2003: "f32[][]cuda:0" = _foreach_reciprocal[79]
+	        getitem_2004: "f32[][]cuda:0" = _foreach_reciprocal[80]
+	        getitem_2005: "f32[][]cuda:0" = _foreach_reciprocal[81]
+	        getitem_2006: "f32[][]cuda:0" = _foreach_reciprocal[82]
+	        getitem_2007: "f32[][]cuda:0" = _foreach_reciprocal[83]
+	        getitem_2008: "f32[][]cuda:0" = _foreach_reciprocal[84]
+	        getitem_2009: "f32[][]cuda:0" = _foreach_reciprocal[85]
+	        getitem_2010: "f32[][]cuda:0" = _foreach_reciprocal[86]
+	        getitem_2011: "f32[][]cuda:0" = _foreach_reciprocal[87]
+	        getitem_2012: "f32[][]cuda:0" = _foreach_reciprocal[88]
+	        getitem_2013: "f32[][]cuda:0" = _foreach_reciprocal[89]
+	        getitem_2014: "f32[][]cuda:0" = _foreach_reciprocal[90]
+	        getitem_2015: "f32[][]cuda:0" = _foreach_reciprocal[91]
+	        getitem_2016: "f32[][]cuda:0" = _foreach_reciprocal[92]
+	        getitem_2017: "f32[][]cuda:0" = _foreach_reciprocal[93]
+	        getitem_2018: "f32[][]cuda:0" = _foreach_reciprocal[94]
+	        getitem_2019: "f32[][]cuda:0" = _foreach_reciprocal[95]
+	        getitem_2020: "f32[][]cuda:0" = _foreach_reciprocal[96]
+	        getitem_2021: "f32[][]cuda:0" = _foreach_reciprocal[97]
+	        getitem_2022: "f32[][]cuda:0" = _foreach_reciprocal[98]
+	        getitem_2023: "f32[][]cuda:0" = _foreach_reciprocal[99]
+	        getitem_2024: "f32[][]cuda:0" = _foreach_reciprocal[100]
+	        getitem_2025: "f32[][]cuda:0" = _foreach_reciprocal[101]
+	        getitem_2026: "f32[][]cuda:0" = _foreach_reciprocal[102]
+	        getitem_2027: "f32[][]cuda:0" = _foreach_reciprocal[103]
+	        getitem_2028: "f32[][]cuda:0" = _foreach_reciprocal[104]
+	        getitem_2029: "f32[][]cuda:0" = _foreach_reciprocal[105]
+	        getitem_2030: "f32[][]cuda:0" = _foreach_reciprocal[106]
+	        getitem_2031: "f32[][]cuda:0" = _foreach_reciprocal[107]
+	        getitem_2032: "f32[][]cuda:0" = _foreach_reciprocal[108]
+	        getitem_2033: "f32[][]cuda:0" = _foreach_reciprocal[109]
+	        getitem_2034: "f32[][]cuda:0" = _foreach_reciprocal[110]
+	        getitem_2035: "f32[][]cuda:0" = _foreach_reciprocal[111]
+	        getitem_2036: "f32[][]cuda:0" = _foreach_reciprocal[112]
+	        getitem_2037: "f32[][]cuda:0" = _foreach_reciprocal[113]
+	        getitem_2038: "f32[][]cuda:0" = _foreach_reciprocal[114]
+	        getitem_2039: "f32[][]cuda:0" = _foreach_reciprocal[115]
+	        getitem_2040: "f32[][]cuda:0" = _foreach_reciprocal[116]
+	        getitem_2041: "f32[][]cuda:0" = _foreach_reciprocal[117]
+	        getitem_2042: "f32[][]cuda:0" = _foreach_reciprocal[118]
+	        getitem_2043: "f32[][]cuda:0" = _foreach_reciprocal[119]
+	        getitem_2044: "f32[][]cuda:0" = _foreach_reciprocal[120]
+	        getitem_2045: "f32[][]cuda:0" = _foreach_reciprocal[121]
+	        getitem_2046: "f32[][]cuda:0" = _foreach_reciprocal[122]
+	        getitem_2047: "f32[][]cuda:0" = _foreach_reciprocal[123]
+	        getitem_2048: "f32[][]cuda:0" = _foreach_reciprocal[124]
+	        getitem_2049: "f32[][]cuda:0" = _foreach_reciprocal[125]
+	        getitem_2050: "f32[][]cuda:0" = _foreach_reciprocal[126]
+	        getitem_2051: "f32[][]cuda:0" = _foreach_reciprocal[127]
+	        getitem_2052: "f32[][]cuda:0" = _foreach_reciprocal[128]
+	        getitem_2053: "f32[][]cuda:0" = _foreach_reciprocal[129]
+	        getitem_2054: "f32[][]cuda:0" = _foreach_reciprocal[130]
+	        getitem_2055: "f32[][]cuda:0" = _foreach_reciprocal[131]
+	        getitem_2056: "f32[][]cuda:0" = _foreach_reciprocal[132]
+	        getitem_2057: "f32[][]cuda:0" = _foreach_reciprocal[133]
+	        getitem_2058: "f32[][]cuda:0" = _foreach_reciprocal[134]
+	        getitem_2059: "f32[][]cuda:0" = _foreach_reciprocal[135]
+	        getitem_2060: "f32[][]cuda:0" = _foreach_reciprocal[136]
+	        getitem_2061: "f32[][]cuda:0" = _foreach_reciprocal[137]
+	        getitem_2062: "f32[][]cuda:0" = _foreach_reciprocal[138]
+	        getitem_2063: "f32[][]cuda:0" = _foreach_reciprocal[139]
+	        getitem_2064: "f32[][]cuda:0" = _foreach_reciprocal[140]
+	        getitem_2065: "f32[][]cuda:0" = _foreach_reciprocal[141]
+	        getitem_2066: "f32[][]cuda:0" = _foreach_reciprocal[142]
+	        getitem_2067: "f32[][]cuda:0" = _foreach_reciprocal[143]
+	        getitem_2068: "f32[][]cuda:0" = _foreach_reciprocal[144]
+	        getitem_2069: "f32[][]cuda:0" = _foreach_reciprocal[145]
+	        getitem_2070: "f32[][]cuda:0" = _foreach_reciprocal[146]
+	        getitem_2071: "f32[][]cuda:0" = _foreach_reciprocal[147];  _foreach_reciprocal = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:567 in _multi_tensor_adam, code: torch._foreach_sqrt_(bias_correction2)
+	        _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]);  getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None
+	        getitem_2072: "f32[][]cuda:0" = _foreach_sqrt[0]
+	        getitem_2073: "f32[][]cuda:0" = _foreach_sqrt[1]
+	        getitem_2074: "f32[][]cuda:0" = _foreach_sqrt[2]
+	        getitem_2075: "f32[][]cuda:0" = _foreach_sqrt[3]
+	        getitem_2076: "f32[][]cuda:0" = _foreach_sqrt[4]
+	        getitem_2077: "f32[][]cuda:0" = _foreach_sqrt[5]
+	        getitem_2078: "f32[][]cuda:0" = _foreach_sqrt[6]
+	        getitem_2079: "f32[][]cuda:0" = _foreach_sqrt[7]
+	        getitem_2080: "f32[][]cuda:0" = _foreach_sqrt[8]
+	        getitem_2081: "f32[][]cuda:0" = _foreach_sqrt[9]
+	        getitem_2082: "f32[][]cuda:0" = _foreach_sqrt[10]
+	        getitem_2083: "f32[][]cuda:0" = _foreach_sqrt[11]
+	        getitem_2084: "f32[][]cuda:0" = _foreach_sqrt[12]
+	        getitem_2085: "f32[][]cuda:0" = _foreach_sqrt[13]
+	        getitem_2086: "f32[][]cuda:0" = _foreach_sqrt[14]
+	        getitem_2087: "f32[][]cuda:0" = _foreach_sqrt[15]
+	        getitem_2088: "f32[][]cuda:0" = _foreach_sqrt[16]
+	        getitem_2089: "f32[][]cuda:0" = _foreach_sqrt[17]
+	        getitem_2090: "f32[][]cuda:0" = _foreach_sqrt[18]
+	        getitem_2091: "f32[][]cuda:0" = _foreach_sqrt[19]
+	        getitem_2092: "f32[][]cuda:0" = _foreach_sqrt[20]
+	        getitem_2093: "f32[][]cuda:0" = _foreach_sqrt[21]
+	        getitem_2094: "f32[][]cuda:0" = _foreach_sqrt[22]
+	        getitem_2095: "f32[][]cuda:0" = _foreach_sqrt[23]
+	        getitem_2096: "f32[][]cuda:0" = _foreach_sqrt[24]
+	        getitem_2097: "f32[][]cuda:0" = _foreach_sqrt[25]
+	        getitem_2098: "f32[][]cuda:0" = _foreach_sqrt[26]
+	        getitem_2099: "f32[][]cuda:0" = _foreach_sqrt[27]
+	        getitem_2100: "f32[][]cuda:0" = _foreach_sqrt[28]
+	        getitem_2101: "f32[][]cuda:0" = _foreach_sqrt[29]
+	        getitem_2102: "f32[][]cuda:0" = _foreach_sqrt[30]
+	        getitem_2103: "f32[][]cuda:0" = _foreach_sqrt[31]
+	        getitem_2104: "f32[][]cuda:0" = _foreach_sqrt[32]
+	        getitem_2105: "f32[][]cuda:0" = _foreach_sqrt[33]
+	        getitem_2106: "f32[][]cuda:0" = _foreach_sqrt[34]
+	        getitem_2107: "f32[][]cuda:0" = _foreach_sqrt[35]
+	        getitem_2108: "f32[][]cuda:0" = _foreach_sqrt[36]
+	        getitem_2109: "f32[][]cuda:0" = _foreach_sqrt[37]
+	        getitem_2110: "f32[][]cuda:0" = _foreach_sqrt[38]
+	        getitem_2111: "f32[][]cuda:0" = _foreach_sqrt[39]
+	        getitem_2112: "f32[][]cuda:0" = _foreach_sqrt[40]
+	        getitem_2113: "f32[][]cuda:0" = _foreach_sqrt[41]
+	        getitem_2114: "f32[][]cuda:0" = _foreach_sqrt[42]
+	        getitem_2115: "f32[][]cuda:0" = _foreach_sqrt[43]
+	        getitem_2116: "f32[][]cuda:0" = _foreach_sqrt[44]
+	        getitem_2117: "f32[][]cuda:0" = _foreach_sqrt[45]
+	        getitem_2118: "f32[][]cuda:0" = _foreach_sqrt[46]
+	        getitem_2119: "f32[][]cuda:0" = _foreach_sqrt[47]
+	        getitem_2120: "f32[][]cuda:0" = _foreach_sqrt[48]
+	        getitem_2121: "f32[][]cuda:0" = _foreach_sqrt[49]
+	        getitem_2122: "f32[][]cuda:0" = _foreach_sqrt[50]
+	        getitem_2123: "f32[][]cuda:0" = _foreach_sqrt[51]
+	        getitem_2124: "f32[][]cuda:0" = _foreach_sqrt[52]
+	        getitem_2125: "f32[][]cuda:0" = _foreach_sqrt[53]
+	        getitem_2126: "f32[][]cuda:0" = _foreach_sqrt[54]
+	        getitem_2127: "f32[][]cuda:0" = _foreach_sqrt[55]
+	        getitem_2128: "f32[][]cuda:0" = _foreach_sqrt[56]
+	        getitem_2129: "f32[][]cuda:0" = _foreach_sqrt[57]
+	        getitem_2130: "f32[][]cuda:0" = _foreach_sqrt[58]
+	        getitem_2131: "f32[][]cuda:0" = _foreach_sqrt[59]
+	        getitem_2132: "f32[][]cuda:0" = _foreach_sqrt[60]
+	        getitem_2133: "f32[][]cuda:0" = _foreach_sqrt[61]
+	        getitem_2134: "f32[][]cuda:0" = _foreach_sqrt[62]
+	        getitem_2135: "f32[][]cuda:0" = _foreach_sqrt[63]
+	        getitem_2136: "f32[][]cuda:0" = _foreach_sqrt[64]
+	        getitem_2137: "f32[][]cuda:0" = _foreach_sqrt[65]
+	        getitem_2138: "f32[][]cuda:0" = _foreach_sqrt[66]
+	        getitem_2139: "f32[][]cuda:0" = _foreach_sqrt[67]
+	        getitem_2140: "f32[][]cuda:0" = _foreach_sqrt[68]
+	        getitem_2141: "f32[][]cuda:0" = _foreach_sqrt[69]
+	        getitem_2142: "f32[][]cuda:0" = _foreach_sqrt[70]
+	        getitem_2143: "f32[][]cuda:0" = _foreach_sqrt[71]
+	        getitem_2144: "f32[][]cuda:0" = _foreach_sqrt[72]
+	        getitem_2145: "f32[][]cuda:0" = _foreach_sqrt[73]
+	        getitem_2146: "f32[][]cuda:0" = _foreach_sqrt[74]
+	        getitem_2147: "f32[][]cuda:0" = _foreach_sqrt[75]
+	        getitem_2148: "f32[][]cuda:0" = _foreach_sqrt[76]
+	        getitem_2149: "f32[][]cuda:0" = _foreach_sqrt[77]
+	        getitem_2150: "f32[][]cuda:0" = _foreach_sqrt[78]
+	        getitem_2151: "f32[][]cuda:0" = _foreach_sqrt[79]
+	        getitem_2152: "f32[][]cuda:0" = _foreach_sqrt[80]
+	        getitem_2153: "f32[][]cuda:0" = _foreach_sqrt[81]
+	        getitem_2154: "f32[][]cuda:0" = _foreach_sqrt[82]
+	        getitem_2155: "f32[][]cuda:0" = _foreach_sqrt[83]
+	        getitem_2156: "f32[][]cuda:0" = _foreach_sqrt[84]
+	        getitem_2157: "f32[][]cuda:0" = _foreach_sqrt[85]
+	        getitem_2158: "f32[][]cuda:0" = _foreach_sqrt[86]
+	        getitem_2159: "f32[][]cuda:0" = _foreach_sqrt[87]
+	        getitem_2160: "f32[][]cuda:0" = _foreach_sqrt[88]
+	        getitem_2161: "f32[][]cuda:0" = _foreach_sqrt[89]
+	        getitem_2162: "f32[][]cuda:0" = _foreach_sqrt[90]
+	        getitem_2163: "f32[][]cuda:0" = _foreach_sqrt[91]
+	        getitem_2164: "f32[][]cuda:0" = _foreach_sqrt[92]
+	        getitem_2165: "f32[][]cuda:0" = _foreach_sqrt[93]
+	        getitem_2166: "f32[][]cuda:0" = _foreach_sqrt[94]
+	        getitem_2167: "f32[][]cuda:0" = _foreach_sqrt[95]
+	        getitem_2168: "f32[][]cuda:0" = _foreach_sqrt[96]
+	        getitem_2169: "f32[][]cuda:0" = _foreach_sqrt[97]
+	        getitem_2170: "f32[][]cuda:0" = _foreach_sqrt[98]
+	        getitem_2171: "f32[][]cuda:0" = _foreach_sqrt[99]
+	        getitem_2172: "f32[][]cuda:0" = _foreach_sqrt[100]
+	        getitem_2173: "f32[][]cuda:0" = _foreach_sqrt[101]
+	        getitem_2174: "f32[][]cuda:0" = _foreach_sqrt[102]
+	        getitem_2175: "f32[][]cuda:0" = _foreach_sqrt[103]
+	        getitem_2176: "f32[][]cuda:0" = _foreach_sqrt[104]
+	        getitem_2177: "f32[][]cuda:0" = _foreach_sqrt[105]
+	        getitem_2178: "f32[][]cuda:0" = _foreach_sqrt[106]
+	        getitem_2179: "f32[][]cuda:0" = _foreach_sqrt[107]
+	        getitem_2180: "f32[][]cuda:0" = _foreach_sqrt[108]
+	        getitem_2181: "f32[][]cuda:0" = _foreach_sqrt[109]
+	        getitem_2182: "f32[][]cuda:0" = _foreach_sqrt[110]
+	        getitem_2183: "f32[][]cuda:0" = _foreach_sqrt[111]
+	        getitem_2184: "f32[][]cuda:0" = _foreach_sqrt[112]
+	        getitem_2185: "f32[][]cuda:0" = _foreach_sqrt[113]
+	        getitem_2186: "f32[][]cuda:0" = _foreach_sqrt[114]
+	        getitem_2187: "f32[][]cuda:0" = _foreach_sqrt[115]
+	        getitem_2188: "f32[][]cuda:0" = _foreach_sqrt[116]
+	        getitem_2189: "f32[][]cuda:0" = _foreach_sqrt[117]
+	        getitem_2190: "f32[][]cuda:0" = _foreach_sqrt[118]
+	        getitem_2191: "f32[][]cuda:0" = _foreach_sqrt[119]
+	        getitem_2192: "f32[][]cuda:0" = _foreach_sqrt[120]
+	        getitem_2193: "f32[][]cuda:0" = _foreach_sqrt[121]
+	        getitem_2194: "f32[][]cuda:0" = _foreach_sqrt[122]
+	        getitem_2195: "f32[][]cuda:0" = _foreach_sqrt[123]
+	        getitem_2196: "f32[][]cuda:0" = _foreach_sqrt[124]
+	        getitem_2197: "f32[][]cuda:0" = _foreach_sqrt[125]
+	        getitem_2198: "f32[][]cuda:0" = _foreach_sqrt[126]
+	        getitem_2199: "f32[][]cuda:0" = _foreach_sqrt[127]
+	        getitem_2200: "f32[][]cuda:0" = _foreach_sqrt[128]
+	        getitem_2201: "f32[][]cuda:0" = _foreach_sqrt[129]
+	        getitem_2202: "f32[][]cuda:0" = _foreach_sqrt[130]
+	        getitem_2203: "f32[][]cuda:0" = _foreach_sqrt[131]
+	        getitem_2204: "f32[][]cuda:0" = _foreach_sqrt[132]
+	        getitem_2205: "f32[][]cuda:0" = _foreach_sqrt[133]
+	        getitem_2206: "f32[][]cuda:0" = _foreach_sqrt[134]
+	        getitem_2207: "f32[][]cuda:0" = _foreach_sqrt[135]
+	        getitem_2208: "f32[][]cuda:0" = _foreach_sqrt[136]
+	        getitem_2209: "f32[][]cuda:0" = _foreach_sqrt[137]
+	        getitem_2210: "f32[][]cuda:0" = _foreach_sqrt[138]
+	        getitem_2211: "f32[][]cuda:0" = _foreach_sqrt[139]
+	        getitem_2212: "f32[][]cuda:0" = _foreach_sqrt[140]
+	        getitem_2213: "f32[][]cuda:0" = _foreach_sqrt[141]
+	        getitem_2214: "f32[][]cuda:0" = _foreach_sqrt[142]
+	        getitem_2215: "f32[][]cuda:0" = _foreach_sqrt[143]
+	        getitem_2216: "f32[][]cuda:0" = _foreach_sqrt[144]
+	        getitem_2217: "f32[][]cuda:0" = _foreach_sqrt[145]
+	        getitem_2218: "f32[][]cuda:0" = _foreach_sqrt[146]
+	        getitem_2219: "f32[][]cuda:0" = _foreach_sqrt[147];  _foreach_sqrt = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:582 in _multi_tensor_adam, code: exp_avg_sq_sqrt = torch._foreach_sqrt(device_exp_avg_sqs)
+	        _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])
+	        getitem_2220: "f32[50304, 768][768, 1]cuda:0" = _foreach_sqrt_1[0]
+	        getitem_2221: "f32[1024, 768][768, 1]cuda:0" = _foreach_sqrt_1[1]
+	        getitem_2222: "f32[768][1]cuda:0" = _foreach_sqrt_1[2]
+	        getitem_2223: "f32[768][1]cuda:0" = _foreach_sqrt_1[3]
+	        getitem_2224: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[4]
+	        getitem_2225: "f32[2304][1]cuda:0" = _foreach_sqrt_1[5]
+	        getitem_2226: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[6]
+	        getitem_2227: "f32[768][1]cuda:0" = _foreach_sqrt_1[7]
+	        getitem_2228: "f32[768][1]cuda:0" = _foreach_sqrt_1[8]
+	        getitem_2229: "f32[768][1]cuda:0" = _foreach_sqrt_1[9]
+	        getitem_2230: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[10]
+	        getitem_2231: "f32[3072][1]cuda:0" = _foreach_sqrt_1[11]
+	        getitem_2232: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[12]
+	        getitem_2233: "f32[768][1]cuda:0" = _foreach_sqrt_1[13]
+	        getitem_2234: "f32[768][1]cuda:0" = _foreach_sqrt_1[14]
+	        getitem_2235: "f32[768][1]cuda:0" = _foreach_sqrt_1[15]
+	        getitem_2236: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[16]
+	        getitem_2237: "f32[2304][1]cuda:0" = _foreach_sqrt_1[17]
+	        getitem_2238: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[18]
+	        getitem_2239: "f32[768][1]cuda:0" = _foreach_sqrt_1[19]
+	        getitem_2240: "f32[768][1]cuda:0" = _foreach_sqrt_1[20]
+	        getitem_2241: "f32[768][1]cuda:0" = _foreach_sqrt_1[21]
+	        getitem_2242: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[22]
+	        getitem_2243: "f32[3072][1]cuda:0" = _foreach_sqrt_1[23]
+	        getitem_2244: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[24]
+	        getitem_2245: "f32[768][1]cuda:0" = _foreach_sqrt_1[25]
+	        getitem_2246: "f32[768][1]cuda:0" = _foreach_sqrt_1[26]
+	        getitem_2247: "f32[768][1]cuda:0" = _foreach_sqrt_1[27]
+	        getitem_2248: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[28]
+	        getitem_2249: "f32[2304][1]cuda:0" = _foreach_sqrt_1[29]
+	        getitem_2250: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[30]
+	        getitem_2251: "f32[768][1]cuda:0" = _foreach_sqrt_1[31]
+	        getitem_2252: "f32[768][1]cuda:0" = _foreach_sqrt_1[32]
+	        getitem_2253: "f32[768][1]cuda:0" = _foreach_sqrt_1[33]
+	        getitem_2254: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[34]
+	        getitem_2255: "f32[3072][1]cuda:0" = _foreach_sqrt_1[35]
+	        getitem_2256: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[36]
+	        getitem_2257: "f32[768][1]cuda:0" = _foreach_sqrt_1[37]
+	        getitem_2258: "f32[768][1]cuda:0" = _foreach_sqrt_1[38]
+	        getitem_2259: "f32[768][1]cuda:0" = _foreach_sqrt_1[39]
+	        getitem_2260: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[40]
+	        getitem_2261: "f32[2304][1]cuda:0" = _foreach_sqrt_1[41]
+	        getitem_2262: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[42]
+	        getitem_2263: "f32[768][1]cuda:0" = _foreach_sqrt_1[43]
+	        getitem_2264: "f32[768][1]cuda:0" = _foreach_sqrt_1[44]
+	        getitem_2265: "f32[768][1]cuda:0" = _foreach_sqrt_1[45]
+	        getitem_2266: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[46]
+	        getitem_2267: "f32[3072][1]cuda:0" = _foreach_sqrt_1[47]
+	        getitem_2268: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[48]
+	        getitem_2269: "f32[768][1]cuda:0" = _foreach_sqrt_1[49]
+	        getitem_2270: "f32[768][1]cuda:0" = _foreach_sqrt_1[50]
+	        getitem_2271: "f32[768][1]cuda:0" = _foreach_sqrt_1[51]
+	        getitem_2272: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[52]
+	        getitem_2273: "f32[2304][1]cuda:0" = _foreach_sqrt_1[53]
+	        getitem_2274: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[54]
+	        getitem_2275: "f32[768][1]cuda:0" = _foreach_sqrt_1[55]
+	        getitem_2276: "f32[768][1]cuda:0" = _foreach_sqrt_1[56]
+	        getitem_2277: "f32[768][1]cuda:0" = _foreach_sqrt_1[57]
+	        getitem_2278: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[58]
+	        getitem_2279: "f32[3072][1]cuda:0" = _foreach_sqrt_1[59]
+	        getitem_2280: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[60]
+	        getitem_2281: "f32[768][1]cuda:0" = _foreach_sqrt_1[61]
+	        getitem_2282: "f32[768][1]cuda:0" = _foreach_sqrt_1[62]
+	        getitem_2283: "f32[768][1]cuda:0" = _foreach_sqrt_1[63]
+	        getitem_2284: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[64]
+	        getitem_2285: "f32[2304][1]cuda:0" = _foreach_sqrt_1[65]
+	        getitem_2286: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[66]
+	        getitem_2287: "f32[768][1]cuda:0" = _foreach_sqrt_1[67]
+	        getitem_2288: "f32[768][1]cuda:0" = _foreach_sqrt_1[68]
+	        getitem_2289: "f32[768][1]cuda:0" = _foreach_sqrt_1[69]
+	        getitem_2290: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[70]
+	        getitem_2291: "f32[3072][1]cuda:0" = _foreach_sqrt_1[71]
+	        getitem_2292: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[72]
+	        getitem_2293: "f32[768][1]cuda:0" = _foreach_sqrt_1[73]
+	        getitem_2294: "f32[768][1]cuda:0" = _foreach_sqrt_1[74]
+	        getitem_2295: "f32[768][1]cuda:0" = _foreach_sqrt_1[75]
+	        getitem_2296: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[76]
+	        getitem_2297: "f32[2304][1]cuda:0" = _foreach_sqrt_1[77]
+	        getitem_2298: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[78]
+	        getitem_2299: "f32[768][1]cuda:0" = _foreach_sqrt_1[79]
+	        getitem_2300: "f32[768][1]cuda:0" = _foreach_sqrt_1[80]
+	        getitem_2301: "f32[768][1]cuda:0" = _foreach_sqrt_1[81]
+	        getitem_2302: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[82]
+	        getitem_2303: "f32[3072][1]cuda:0" = _foreach_sqrt_1[83]
+	        getitem_2304: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[84]
+	        getitem_2305: "f32[768][1]cuda:0" = _foreach_sqrt_1[85]
+	        getitem_2306: "f32[768][1]cuda:0" = _foreach_sqrt_1[86]
+	        getitem_2307: "f32[768][1]cuda:0" = _foreach_sqrt_1[87]
+	        getitem_2308: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[88]
+	        getitem_2309: "f32[2304][1]cuda:0" = _foreach_sqrt_1[89]
+	        getitem_2310: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[90]
+	        getitem_2311: "f32[768][1]cuda:0" = _foreach_sqrt_1[91]
+	        getitem_2312: "f32[768][1]cuda:0" = _foreach_sqrt_1[92]
+	        getitem_2313: "f32[768][1]cuda:0" = _foreach_sqrt_1[93]
+	        getitem_2314: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[94]
+	        getitem_2315: "f32[3072][1]cuda:0" = _foreach_sqrt_1[95]
+	        getitem_2316: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[96]
+	        getitem_2317: "f32[768][1]cuda:0" = _foreach_sqrt_1[97]
+	        getitem_2318: "f32[768][1]cuda:0" = _foreach_sqrt_1[98]
+	        getitem_2319: "f32[768][1]cuda:0" = _foreach_sqrt_1[99]
+	        getitem_2320: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[100]
+	        getitem_2321: "f32[2304][1]cuda:0" = _foreach_sqrt_1[101]
+	        getitem_2322: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[102]
+	        getitem_2323: "f32[768][1]cuda:0" = _foreach_sqrt_1[103]
+	        getitem_2324: "f32[768][1]cuda:0" = _foreach_sqrt_1[104]
+	        getitem_2325: "f32[768][1]cuda:0" = _foreach_sqrt_1[105]
+	        getitem_2326: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[106]
+	        getitem_2327: "f32[3072][1]cuda:0" = _foreach_sqrt_1[107]
+	        getitem_2328: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[108]
+	        getitem_2329: "f32[768][1]cuda:0" = _foreach_sqrt_1[109]
+	        getitem_2330: "f32[768][1]cuda:0" = _foreach_sqrt_1[110]
+	        getitem_2331: "f32[768][1]cuda:0" = _foreach_sqrt_1[111]
+	        getitem_2332: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[112]
+	        getitem_2333: "f32[2304][1]cuda:0" = _foreach_sqrt_1[113]
+	        getitem_2334: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[114]
+	        getitem_2335: "f32[768][1]cuda:0" = _foreach_sqrt_1[115]
+	        getitem_2336: "f32[768][1]cuda:0" = _foreach_sqrt_1[116]
+	        getitem_2337: "f32[768][1]cuda:0" = _foreach_sqrt_1[117]
+	        getitem_2338: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[118]
+	        getitem_2339: "f32[3072][1]cuda:0" = _foreach_sqrt_1[119]
+	        getitem_2340: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[120]
+	        getitem_2341: "f32[768][1]cuda:0" = _foreach_sqrt_1[121]
+	        getitem_2342: "f32[768][1]cuda:0" = _foreach_sqrt_1[122]
+	        getitem_2343: "f32[768][1]cuda:0" = _foreach_sqrt_1[123]
+	        getitem_2344: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[124]
+	        getitem_2345: "f32[2304][1]cuda:0" = _foreach_sqrt_1[125]
+	        getitem_2346: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[126]
+	        getitem_2347: "f32[768][1]cuda:0" = _foreach_sqrt_1[127]
+	        getitem_2348: "f32[768][1]cuda:0" = _foreach_sqrt_1[128]
+	        getitem_2349: "f32[768][1]cuda:0" = _foreach_sqrt_1[129]
+	        getitem_2350: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[130]
+	        getitem_2351: "f32[3072][1]cuda:0" = _foreach_sqrt_1[131]
+	        getitem_2352: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[132]
+	        getitem_2353: "f32[768][1]cuda:0" = _foreach_sqrt_1[133]
+	        getitem_2354: "f32[768][1]cuda:0" = _foreach_sqrt_1[134]
+	        getitem_2355: "f32[768][1]cuda:0" = _foreach_sqrt_1[135]
+	        getitem_2356: "f32[2304, 768][768, 1]cuda:0" = _foreach_sqrt_1[136]
+	        getitem_2357: "f32[2304][1]cuda:0" = _foreach_sqrt_1[137]
+	        getitem_2358: "f32[768, 768][768, 1]cuda:0" = _foreach_sqrt_1[138]
+	        getitem_2359: "f32[768][1]cuda:0" = _foreach_sqrt_1[139]
+	        getitem_2360: "f32[768][1]cuda:0" = _foreach_sqrt_1[140]
+	        getitem_2361: "f32[768][1]cuda:0" = _foreach_sqrt_1[141]
+	        getitem_2362: "f32[3072, 768][768, 1]cuda:0" = _foreach_sqrt_1[142]
+	        getitem_2363: "f32[3072][1]cuda:0" = _foreach_sqrt_1[143]
+	        getitem_2364: "f32[768, 3072][3072, 1]cuda:0" = _foreach_sqrt_1[144]
+	        getitem_2365: "f32[768][1]cuda:0" = _foreach_sqrt_1[145]
+	        getitem_2366: "f32[768][1]cuda:0" = _foreach_sqrt_1[146]
+	        getitem_2367: "f32[768][1]cuda:0" = _foreach_sqrt_1[147];  _foreach_sqrt_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:584 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt)
+	        _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]);  getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None
+	        getitem_2368: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_1[0]
+	        getitem_2369: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_1[1]
+	        getitem_2370: "f32[768][1]cuda:0" = _foreach_div_1[2]
+	        getitem_2371: "f32[768][1]cuda:0" = _foreach_div_1[3]
+	        getitem_2372: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[4]
+	        getitem_2373: "f32[2304][1]cuda:0" = _foreach_div_1[5]
+	        getitem_2374: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[6]
+	        getitem_2375: "f32[768][1]cuda:0" = _foreach_div_1[7]
+	        getitem_2376: "f32[768][1]cuda:0" = _foreach_div_1[8]
+	        getitem_2377: "f32[768][1]cuda:0" = _foreach_div_1[9]
+	        getitem_2378: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[10]
+	        getitem_2379: "f32[3072][1]cuda:0" = _foreach_div_1[11]
+	        getitem_2380: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[12]
+	        getitem_2381: "f32[768][1]cuda:0" = _foreach_div_1[13]
+	        getitem_2382: "f32[768][1]cuda:0" = _foreach_div_1[14]
+	        getitem_2383: "f32[768][1]cuda:0" = _foreach_div_1[15]
+	        getitem_2384: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[16]
+	        getitem_2385: "f32[2304][1]cuda:0" = _foreach_div_1[17]
+	        getitem_2386: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[18]
+	        getitem_2387: "f32[768][1]cuda:0" = _foreach_div_1[19]
+	        getitem_2388: "f32[768][1]cuda:0" = _foreach_div_1[20]
+	        getitem_2389: "f32[768][1]cuda:0" = _foreach_div_1[21]
+	        getitem_2390: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[22]
+	        getitem_2391: "f32[3072][1]cuda:0" = _foreach_div_1[23]
+	        getitem_2392: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[24]
+	        getitem_2393: "f32[768][1]cuda:0" = _foreach_div_1[25]
+	        getitem_2394: "f32[768][1]cuda:0" = _foreach_div_1[26]
+	        getitem_2395: "f32[768][1]cuda:0" = _foreach_div_1[27]
+	        getitem_2396: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[28]
+	        getitem_2397: "f32[2304][1]cuda:0" = _foreach_div_1[29]
+	        getitem_2398: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[30]
+	        getitem_2399: "f32[768][1]cuda:0" = _foreach_div_1[31]
+	        getitem_2400: "f32[768][1]cuda:0" = _foreach_div_1[32]
+	        getitem_2401: "f32[768][1]cuda:0" = _foreach_div_1[33]
+	        getitem_2402: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[34]
+	        getitem_2403: "f32[3072][1]cuda:0" = _foreach_div_1[35]
+	        getitem_2404: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[36]
+	        getitem_2405: "f32[768][1]cuda:0" = _foreach_div_1[37]
+	        getitem_2406: "f32[768][1]cuda:0" = _foreach_div_1[38]
+	        getitem_2407: "f32[768][1]cuda:0" = _foreach_div_1[39]
+	        getitem_2408: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[40]
+	        getitem_2409: "f32[2304][1]cuda:0" = _foreach_div_1[41]
+	        getitem_2410: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[42]
+	        getitem_2411: "f32[768][1]cuda:0" = _foreach_div_1[43]
+	        getitem_2412: "f32[768][1]cuda:0" = _foreach_div_1[44]
+	        getitem_2413: "f32[768][1]cuda:0" = _foreach_div_1[45]
+	        getitem_2414: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[46]
+	        getitem_2415: "f32[3072][1]cuda:0" = _foreach_div_1[47]
+	        getitem_2416: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[48]
+	        getitem_2417: "f32[768][1]cuda:0" = _foreach_div_1[49]
+	        getitem_2418: "f32[768][1]cuda:0" = _foreach_div_1[50]
+	        getitem_2419: "f32[768][1]cuda:0" = _foreach_div_1[51]
+	        getitem_2420: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[52]
+	        getitem_2421: "f32[2304][1]cuda:0" = _foreach_div_1[53]
+	        getitem_2422: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[54]
+	        getitem_2423: "f32[768][1]cuda:0" = _foreach_div_1[55]
+	        getitem_2424: "f32[768][1]cuda:0" = _foreach_div_1[56]
+	        getitem_2425: "f32[768][1]cuda:0" = _foreach_div_1[57]
+	        getitem_2426: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[58]
+	        getitem_2427: "f32[3072][1]cuda:0" = _foreach_div_1[59]
+	        getitem_2428: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[60]
+	        getitem_2429: "f32[768][1]cuda:0" = _foreach_div_1[61]
+	        getitem_2430: "f32[768][1]cuda:0" = _foreach_div_1[62]
+	        getitem_2431: "f32[768][1]cuda:0" = _foreach_div_1[63]
+	        getitem_2432: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[64]
+	        getitem_2433: "f32[2304][1]cuda:0" = _foreach_div_1[65]
+	        getitem_2434: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[66]
+	        getitem_2435: "f32[768][1]cuda:0" = _foreach_div_1[67]
+	        getitem_2436: "f32[768][1]cuda:0" = _foreach_div_1[68]
+	        getitem_2437: "f32[768][1]cuda:0" = _foreach_div_1[69]
+	        getitem_2438: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[70]
+	        getitem_2439: "f32[3072][1]cuda:0" = _foreach_div_1[71]
+	        getitem_2440: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[72]
+	        getitem_2441: "f32[768][1]cuda:0" = _foreach_div_1[73]
+	        getitem_2442: "f32[768][1]cuda:0" = _foreach_div_1[74]
+	        getitem_2443: "f32[768][1]cuda:0" = _foreach_div_1[75]
+	        getitem_2444: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[76]
+	        getitem_2445: "f32[2304][1]cuda:0" = _foreach_div_1[77]
+	        getitem_2446: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[78]
+	        getitem_2447: "f32[768][1]cuda:0" = _foreach_div_1[79]
+	        getitem_2448: "f32[768][1]cuda:0" = _foreach_div_1[80]
+	        getitem_2449: "f32[768][1]cuda:0" = _foreach_div_1[81]
+	        getitem_2450: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[82]
+	        getitem_2451: "f32[3072][1]cuda:0" = _foreach_div_1[83]
+	        getitem_2452: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[84]
+	        getitem_2453: "f32[768][1]cuda:0" = _foreach_div_1[85]
+	        getitem_2454: "f32[768][1]cuda:0" = _foreach_div_1[86]
+	        getitem_2455: "f32[768][1]cuda:0" = _foreach_div_1[87]
+	        getitem_2456: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[88]
+	        getitem_2457: "f32[2304][1]cuda:0" = _foreach_div_1[89]
+	        getitem_2458: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[90]
+	        getitem_2459: "f32[768][1]cuda:0" = _foreach_div_1[91]
+	        getitem_2460: "f32[768][1]cuda:0" = _foreach_div_1[92]
+	        getitem_2461: "f32[768][1]cuda:0" = _foreach_div_1[93]
+	        getitem_2462: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[94]
+	        getitem_2463: "f32[3072][1]cuda:0" = _foreach_div_1[95]
+	        getitem_2464: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[96]
+	        getitem_2465: "f32[768][1]cuda:0" = _foreach_div_1[97]
+	        getitem_2466: "f32[768][1]cuda:0" = _foreach_div_1[98]
+	        getitem_2467: "f32[768][1]cuda:0" = _foreach_div_1[99]
+	        getitem_2468: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[100]
+	        getitem_2469: "f32[2304][1]cuda:0" = _foreach_div_1[101]
+	        getitem_2470: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[102]
+	        getitem_2471: "f32[768][1]cuda:0" = _foreach_div_1[103]
+	        getitem_2472: "f32[768][1]cuda:0" = _foreach_div_1[104]
+	        getitem_2473: "f32[768][1]cuda:0" = _foreach_div_1[105]
+	        getitem_2474: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[106]
+	        getitem_2475: "f32[3072][1]cuda:0" = _foreach_div_1[107]
+	        getitem_2476: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[108]
+	        getitem_2477: "f32[768][1]cuda:0" = _foreach_div_1[109]
+	        getitem_2478: "f32[768][1]cuda:0" = _foreach_div_1[110]
+	        getitem_2479: "f32[768][1]cuda:0" = _foreach_div_1[111]
+	        getitem_2480: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[112]
+	        getitem_2481: "f32[2304][1]cuda:0" = _foreach_div_1[113]
+	        getitem_2482: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[114]
+	        getitem_2483: "f32[768][1]cuda:0" = _foreach_div_1[115]
+	        getitem_2484: "f32[768][1]cuda:0" = _foreach_div_1[116]
+	        getitem_2485: "f32[768][1]cuda:0" = _foreach_div_1[117]
+	        getitem_2486: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[118]
+	        getitem_2487: "f32[3072][1]cuda:0" = _foreach_div_1[119]
+	        getitem_2488: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[120]
+	        getitem_2489: "f32[768][1]cuda:0" = _foreach_div_1[121]
+	        getitem_2490: "f32[768][1]cuda:0" = _foreach_div_1[122]
+	        getitem_2491: "f32[768][1]cuda:0" = _foreach_div_1[123]
+	        getitem_2492: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[124]
+	        getitem_2493: "f32[2304][1]cuda:0" = _foreach_div_1[125]
+	        getitem_2494: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[126]
+	        getitem_2495: "f32[768][1]cuda:0" = _foreach_div_1[127]
+	        getitem_2496: "f32[768][1]cuda:0" = _foreach_div_1[128]
+	        getitem_2497: "f32[768][1]cuda:0" = _foreach_div_1[129]
+	        getitem_2498: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[130]
+	        getitem_2499: "f32[3072][1]cuda:0" = _foreach_div_1[131]
+	        getitem_2500: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[132]
+	        getitem_2501: "f32[768][1]cuda:0" = _foreach_div_1[133]
+	        getitem_2502: "f32[768][1]cuda:0" = _foreach_div_1[134]
+	        getitem_2503: "f32[768][1]cuda:0" = _foreach_div_1[135]
+	        getitem_2504: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_1[136]
+	        getitem_2505: "f32[2304][1]cuda:0" = _foreach_div_1[137]
+	        getitem_2506: "f32[768, 768][768, 1]cuda:0" = _foreach_div_1[138]
+	        getitem_2507: "f32[768][1]cuda:0" = _foreach_div_1[139]
+	        getitem_2508: "f32[768][1]cuda:0" = _foreach_div_1[140]
+	        getitem_2509: "f32[768][1]cuda:0" = _foreach_div_1[141]
+	        getitem_2510: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_1[142]
+	        getitem_2511: "f32[3072][1]cuda:0" = _foreach_div_1[143]
+	        getitem_2512: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_1[144]
+	        getitem_2513: "f32[768][1]cuda:0" = _foreach_div_1[145]
+	        getitem_2514: "f32[768][1]cuda:0" = _foreach_div_1[146]
+	        getitem_2515: "f32[768][1]cuda:0" = _foreach_div_1[147];  _foreach_div_1 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:585 in _multi_tensor_adam, code: torch._foreach_add_(exp_avg_sq_sqrt, eps)
+	        _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08);  getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None
+	        getitem_2516: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_3[0]
+	        getitem_2517: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_3[1]
+	        getitem_2518: "f32[768][1]cuda:0" = _foreach_add_3[2]
+	        getitem_2519: "f32[768][1]cuda:0" = _foreach_add_3[3]
+	        getitem_2520: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[4]
+	        getitem_2521: "f32[2304][1]cuda:0" = _foreach_add_3[5]
+	        getitem_2522: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[6]
+	        getitem_2523: "f32[768][1]cuda:0" = _foreach_add_3[7]
+	        getitem_2524: "f32[768][1]cuda:0" = _foreach_add_3[8]
+	        getitem_2525: "f32[768][1]cuda:0" = _foreach_add_3[9]
+	        getitem_2526: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[10]
+	        getitem_2527: "f32[3072][1]cuda:0" = _foreach_add_3[11]
+	        getitem_2528: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[12]
+	        getitem_2529: "f32[768][1]cuda:0" = _foreach_add_3[13]
+	        getitem_2530: "f32[768][1]cuda:0" = _foreach_add_3[14]
+	        getitem_2531: "f32[768][1]cuda:0" = _foreach_add_3[15]
+	        getitem_2532: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[16]
+	        getitem_2533: "f32[2304][1]cuda:0" = _foreach_add_3[17]
+	        getitem_2534: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[18]
+	        getitem_2535: "f32[768][1]cuda:0" = _foreach_add_3[19]
+	        getitem_2536: "f32[768][1]cuda:0" = _foreach_add_3[20]
+	        getitem_2537: "f32[768][1]cuda:0" = _foreach_add_3[21]
+	        getitem_2538: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[22]
+	        getitem_2539: "f32[3072][1]cuda:0" = _foreach_add_3[23]
+	        getitem_2540: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[24]
+	        getitem_2541: "f32[768][1]cuda:0" = _foreach_add_3[25]
+	        getitem_2542: "f32[768][1]cuda:0" = _foreach_add_3[26]
+	        getitem_2543: "f32[768][1]cuda:0" = _foreach_add_3[27]
+	        getitem_2544: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[28]
+	        getitem_2545: "f32[2304][1]cuda:0" = _foreach_add_3[29]
+	        getitem_2546: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[30]
+	        getitem_2547: "f32[768][1]cuda:0" = _foreach_add_3[31]
+	        getitem_2548: "f32[768][1]cuda:0" = _foreach_add_3[32]
+	        getitem_2549: "f32[768][1]cuda:0" = _foreach_add_3[33]
+	        getitem_2550: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[34]
+	        getitem_2551: "f32[3072][1]cuda:0" = _foreach_add_3[35]
+	        getitem_2552: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[36]
+	        getitem_2553: "f32[768][1]cuda:0" = _foreach_add_3[37]
+	        getitem_2554: "f32[768][1]cuda:0" = _foreach_add_3[38]
+	        getitem_2555: "f32[768][1]cuda:0" = _foreach_add_3[39]
+	        getitem_2556: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[40]
+	        getitem_2557: "f32[2304][1]cuda:0" = _foreach_add_3[41]
+	        getitem_2558: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[42]
+	        getitem_2559: "f32[768][1]cuda:0" = _foreach_add_3[43]
+	        getitem_2560: "f32[768][1]cuda:0" = _foreach_add_3[44]
+	        getitem_2561: "f32[768][1]cuda:0" = _foreach_add_3[45]
+	        getitem_2562: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[46]
+	        getitem_2563: "f32[3072][1]cuda:0" = _foreach_add_3[47]
+	        getitem_2564: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[48]
+	        getitem_2565: "f32[768][1]cuda:0" = _foreach_add_3[49]
+	        getitem_2566: "f32[768][1]cuda:0" = _foreach_add_3[50]
+	        getitem_2567: "f32[768][1]cuda:0" = _foreach_add_3[51]
+	        getitem_2568: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[52]
+	        getitem_2569: "f32[2304][1]cuda:0" = _foreach_add_3[53]
+	        getitem_2570: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[54]
+	        getitem_2571: "f32[768][1]cuda:0" = _foreach_add_3[55]
+	        getitem_2572: "f32[768][1]cuda:0" = _foreach_add_3[56]
+	        getitem_2573: "f32[768][1]cuda:0" = _foreach_add_3[57]
+	        getitem_2574: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[58]
+	        getitem_2575: "f32[3072][1]cuda:0" = _foreach_add_3[59]
+	        getitem_2576: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[60]
+	        getitem_2577: "f32[768][1]cuda:0" = _foreach_add_3[61]
+	        getitem_2578: "f32[768][1]cuda:0" = _foreach_add_3[62]
+	        getitem_2579: "f32[768][1]cuda:0" = _foreach_add_3[63]
+	        getitem_2580: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[64]
+	        getitem_2581: "f32[2304][1]cuda:0" = _foreach_add_3[65]
+	        getitem_2582: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[66]
+	        getitem_2583: "f32[768][1]cuda:0" = _foreach_add_3[67]
+	        getitem_2584: "f32[768][1]cuda:0" = _foreach_add_3[68]
+	        getitem_2585: "f32[768][1]cuda:0" = _foreach_add_3[69]
+	        getitem_2586: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[70]
+	        getitem_2587: "f32[3072][1]cuda:0" = _foreach_add_3[71]
+	        getitem_2588: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[72]
+	        getitem_2589: "f32[768][1]cuda:0" = _foreach_add_3[73]
+	        getitem_2590: "f32[768][1]cuda:0" = _foreach_add_3[74]
+	        getitem_2591: "f32[768][1]cuda:0" = _foreach_add_3[75]
+	        getitem_2592: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[76]
+	        getitem_2593: "f32[2304][1]cuda:0" = _foreach_add_3[77]
+	        getitem_2594: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[78]
+	        getitem_2595: "f32[768][1]cuda:0" = _foreach_add_3[79]
+	        getitem_2596: "f32[768][1]cuda:0" = _foreach_add_3[80]
+	        getitem_2597: "f32[768][1]cuda:0" = _foreach_add_3[81]
+	        getitem_2598: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[82]
+	        getitem_2599: "f32[3072][1]cuda:0" = _foreach_add_3[83]
+	        getitem_2600: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[84]
+	        getitem_2601: "f32[768][1]cuda:0" = _foreach_add_3[85]
+	        getitem_2602: "f32[768][1]cuda:0" = _foreach_add_3[86]
+	        getitem_2603: "f32[768][1]cuda:0" = _foreach_add_3[87]
+	        getitem_2604: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[88]
+	        getitem_2605: "f32[2304][1]cuda:0" = _foreach_add_3[89]
+	        getitem_2606: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[90]
+	        getitem_2607: "f32[768][1]cuda:0" = _foreach_add_3[91]
+	        getitem_2608: "f32[768][1]cuda:0" = _foreach_add_3[92]
+	        getitem_2609: "f32[768][1]cuda:0" = _foreach_add_3[93]
+	        getitem_2610: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[94]
+	        getitem_2611: "f32[3072][1]cuda:0" = _foreach_add_3[95]
+	        getitem_2612: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[96]
+	        getitem_2613: "f32[768][1]cuda:0" = _foreach_add_3[97]
+	        getitem_2614: "f32[768][1]cuda:0" = _foreach_add_3[98]
+	        getitem_2615: "f32[768][1]cuda:0" = _foreach_add_3[99]
+	        getitem_2616: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[100]
+	        getitem_2617: "f32[2304][1]cuda:0" = _foreach_add_3[101]
+	        getitem_2618: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[102]
+	        getitem_2619: "f32[768][1]cuda:0" = _foreach_add_3[103]
+	        getitem_2620: "f32[768][1]cuda:0" = _foreach_add_3[104]
+	        getitem_2621: "f32[768][1]cuda:0" = _foreach_add_3[105]
+	        getitem_2622: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[106]
+	        getitem_2623: "f32[3072][1]cuda:0" = _foreach_add_3[107]
+	        getitem_2624: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[108]
+	        getitem_2625: "f32[768][1]cuda:0" = _foreach_add_3[109]
+	        getitem_2626: "f32[768][1]cuda:0" = _foreach_add_3[110]
+	        getitem_2627: "f32[768][1]cuda:0" = _foreach_add_3[111]
+	        getitem_2628: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[112]
+	        getitem_2629: "f32[2304][1]cuda:0" = _foreach_add_3[113]
+	        getitem_2630: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[114]
+	        getitem_2631: "f32[768][1]cuda:0" = _foreach_add_3[115]
+	        getitem_2632: "f32[768][1]cuda:0" = _foreach_add_3[116]
+	        getitem_2633: "f32[768][1]cuda:0" = _foreach_add_3[117]
+	        getitem_2634: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[118]
+	        getitem_2635: "f32[3072][1]cuda:0" = _foreach_add_3[119]
+	        getitem_2636: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[120]
+	        getitem_2637: "f32[768][1]cuda:0" = _foreach_add_3[121]
+	        getitem_2638: "f32[768][1]cuda:0" = _foreach_add_3[122]
+	        getitem_2639: "f32[768][1]cuda:0" = _foreach_add_3[123]
+	        getitem_2640: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[124]
+	        getitem_2641: "f32[2304][1]cuda:0" = _foreach_add_3[125]
+	        getitem_2642: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[126]
+	        getitem_2643: "f32[768][1]cuda:0" = _foreach_add_3[127]
+	        getitem_2644: "f32[768][1]cuda:0" = _foreach_add_3[128]
+	        getitem_2645: "f32[768][1]cuda:0" = _foreach_add_3[129]
+	        getitem_2646: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[130]
+	        getitem_2647: "f32[3072][1]cuda:0" = _foreach_add_3[131]
+	        getitem_2648: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[132]
+	        getitem_2649: "f32[768][1]cuda:0" = _foreach_add_3[133]
+	        getitem_2650: "f32[768][1]cuda:0" = _foreach_add_3[134]
+	        getitem_2651: "f32[768][1]cuda:0" = _foreach_add_3[135]
+	        getitem_2652: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_3[136]
+	        getitem_2653: "f32[2304][1]cuda:0" = _foreach_add_3[137]
+	        getitem_2654: "f32[768, 768][768, 1]cuda:0" = _foreach_add_3[138]
+	        getitem_2655: "f32[768][1]cuda:0" = _foreach_add_3[139]
+	        getitem_2656: "f32[768][1]cuda:0" = _foreach_add_3[140]
+	        getitem_2657: "f32[768][1]cuda:0" = _foreach_add_3[141]
+	        getitem_2658: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_3[142]
+	        getitem_2659: "f32[3072][1]cuda:0" = _foreach_add_3[143]
+	        getitem_2660: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_3[144]
+	        getitem_2661: "f32[768][1]cuda:0" = _foreach_add_3[145]
+	        getitem_2662: "f32[768][1]cuda:0" = _foreach_add_3[146]
+	        getitem_2663: "f32[768][1]cuda:0" = _foreach_add_3[147];  _foreach_add_3 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:586 in _multi_tensor_adam, code: torch._foreach_div_(exp_avg_sq_sqrt, step_size)
+	        _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]);  getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None
+	        getitem_2664: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_2[0]
+	        getitem_2665: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_2[1]
+	        getitem_2666: "f32[768][1]cuda:0" = _foreach_div_2[2]
+	        getitem_2667: "f32[768][1]cuda:0" = _foreach_div_2[3]
+	        getitem_2668: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[4]
+	        getitem_2669: "f32[2304][1]cuda:0" = _foreach_div_2[5]
+	        getitem_2670: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[6]
+	        getitem_2671: "f32[768][1]cuda:0" = _foreach_div_2[7]
+	        getitem_2672: "f32[768][1]cuda:0" = _foreach_div_2[8]
+	        getitem_2673: "f32[768][1]cuda:0" = _foreach_div_2[9]
+	        getitem_2674: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[10]
+	        getitem_2675: "f32[3072][1]cuda:0" = _foreach_div_2[11]
+	        getitem_2676: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[12]
+	        getitem_2677: "f32[768][1]cuda:0" = _foreach_div_2[13]
+	        getitem_2678: "f32[768][1]cuda:0" = _foreach_div_2[14]
+	        getitem_2679: "f32[768][1]cuda:0" = _foreach_div_2[15]
+	        getitem_2680: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[16]
+	        getitem_2681: "f32[2304][1]cuda:0" = _foreach_div_2[17]
+	        getitem_2682: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[18]
+	        getitem_2683: "f32[768][1]cuda:0" = _foreach_div_2[19]
+	        getitem_2684: "f32[768][1]cuda:0" = _foreach_div_2[20]
+	        getitem_2685: "f32[768][1]cuda:0" = _foreach_div_2[21]
+	        getitem_2686: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[22]
+	        getitem_2687: "f32[3072][1]cuda:0" = _foreach_div_2[23]
+	        getitem_2688: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[24]
+	        getitem_2689: "f32[768][1]cuda:0" = _foreach_div_2[25]
+	        getitem_2690: "f32[768][1]cuda:0" = _foreach_div_2[26]
+	        getitem_2691: "f32[768][1]cuda:0" = _foreach_div_2[27]
+	        getitem_2692: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[28]
+	        getitem_2693: "f32[2304][1]cuda:0" = _foreach_div_2[29]
+	        getitem_2694: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[30]
+	        getitem_2695: "f32[768][1]cuda:0" = _foreach_div_2[31]
+	        getitem_2696: "f32[768][1]cuda:0" = _foreach_div_2[32]
+	        getitem_2697: "f32[768][1]cuda:0" = _foreach_div_2[33]
+	        getitem_2698: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[34]
+	        getitem_2699: "f32[3072][1]cuda:0" = _foreach_div_2[35]
+	        getitem_2700: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[36]
+	        getitem_2701: "f32[768][1]cuda:0" = _foreach_div_2[37]
+	        getitem_2702: "f32[768][1]cuda:0" = _foreach_div_2[38]
+	        getitem_2703: "f32[768][1]cuda:0" = _foreach_div_2[39]
+	        getitem_2704: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[40]
+	        getitem_2705: "f32[2304][1]cuda:0" = _foreach_div_2[41]
+	        getitem_2706: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[42]
+	        getitem_2707: "f32[768][1]cuda:0" = _foreach_div_2[43]
+	        getitem_2708: "f32[768][1]cuda:0" = _foreach_div_2[44]
+	        getitem_2709: "f32[768][1]cuda:0" = _foreach_div_2[45]
+	        getitem_2710: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[46]
+	        getitem_2711: "f32[3072][1]cuda:0" = _foreach_div_2[47]
+	        getitem_2712: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[48]
+	        getitem_2713: "f32[768][1]cuda:0" = _foreach_div_2[49]
+	        getitem_2714: "f32[768][1]cuda:0" = _foreach_div_2[50]
+	        getitem_2715: "f32[768][1]cuda:0" = _foreach_div_2[51]
+	        getitem_2716: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[52]
+	        getitem_2717: "f32[2304][1]cuda:0" = _foreach_div_2[53]
+	        getitem_2718: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[54]
+	        getitem_2719: "f32[768][1]cuda:0" = _foreach_div_2[55]
+	        getitem_2720: "f32[768][1]cuda:0" = _foreach_div_2[56]
+	        getitem_2721: "f32[768][1]cuda:0" = _foreach_div_2[57]
+	        getitem_2722: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[58]
+	        getitem_2723: "f32[3072][1]cuda:0" = _foreach_div_2[59]
+	        getitem_2724: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[60]
+	        getitem_2725: "f32[768][1]cuda:0" = _foreach_div_2[61]
+	        getitem_2726: "f32[768][1]cuda:0" = _foreach_div_2[62]
+	        getitem_2727: "f32[768][1]cuda:0" = _foreach_div_2[63]
+	        getitem_2728: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[64]
+	        getitem_2729: "f32[2304][1]cuda:0" = _foreach_div_2[65]
+	        getitem_2730: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[66]
+	        getitem_2731: "f32[768][1]cuda:0" = _foreach_div_2[67]
+	        getitem_2732: "f32[768][1]cuda:0" = _foreach_div_2[68]
+	        getitem_2733: "f32[768][1]cuda:0" = _foreach_div_2[69]
+	        getitem_2734: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[70]
+	        getitem_2735: "f32[3072][1]cuda:0" = _foreach_div_2[71]
+	        getitem_2736: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[72]
+	        getitem_2737: "f32[768][1]cuda:0" = _foreach_div_2[73]
+	        getitem_2738: "f32[768][1]cuda:0" = _foreach_div_2[74]
+	        getitem_2739: "f32[768][1]cuda:0" = _foreach_div_2[75]
+	        getitem_2740: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[76]
+	        getitem_2741: "f32[2304][1]cuda:0" = _foreach_div_2[77]
+	        getitem_2742: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[78]
+	        getitem_2743: "f32[768][1]cuda:0" = _foreach_div_2[79]
+	        getitem_2744: "f32[768][1]cuda:0" = _foreach_div_2[80]
+	        getitem_2745: "f32[768][1]cuda:0" = _foreach_div_2[81]
+	        getitem_2746: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[82]
+	        getitem_2747: "f32[3072][1]cuda:0" = _foreach_div_2[83]
+	        getitem_2748: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[84]
+	        getitem_2749: "f32[768][1]cuda:0" = _foreach_div_2[85]
+	        getitem_2750: "f32[768][1]cuda:0" = _foreach_div_2[86]
+	        getitem_2751: "f32[768][1]cuda:0" = _foreach_div_2[87]
+	        getitem_2752: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[88]
+	        getitem_2753: "f32[2304][1]cuda:0" = _foreach_div_2[89]
+	        getitem_2754: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[90]
+	        getitem_2755: "f32[768][1]cuda:0" = _foreach_div_2[91]
+	        getitem_2756: "f32[768][1]cuda:0" = _foreach_div_2[92]
+	        getitem_2757: "f32[768][1]cuda:0" = _foreach_div_2[93]
+	        getitem_2758: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[94]
+	        getitem_2759: "f32[3072][1]cuda:0" = _foreach_div_2[95]
+	        getitem_2760: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[96]
+	        getitem_2761: "f32[768][1]cuda:0" = _foreach_div_2[97]
+	        getitem_2762: "f32[768][1]cuda:0" = _foreach_div_2[98]
+	        getitem_2763: "f32[768][1]cuda:0" = _foreach_div_2[99]
+	        getitem_2764: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[100]
+	        getitem_2765: "f32[2304][1]cuda:0" = _foreach_div_2[101]
+	        getitem_2766: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[102]
+	        getitem_2767: "f32[768][1]cuda:0" = _foreach_div_2[103]
+	        getitem_2768: "f32[768][1]cuda:0" = _foreach_div_2[104]
+	        getitem_2769: "f32[768][1]cuda:0" = _foreach_div_2[105]
+	        getitem_2770: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[106]
+	        getitem_2771: "f32[3072][1]cuda:0" = _foreach_div_2[107]
+	        getitem_2772: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[108]
+	        getitem_2773: "f32[768][1]cuda:0" = _foreach_div_2[109]
+	        getitem_2774: "f32[768][1]cuda:0" = _foreach_div_2[110]
+	        getitem_2775: "f32[768][1]cuda:0" = _foreach_div_2[111]
+	        getitem_2776: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[112]
+	        getitem_2777: "f32[2304][1]cuda:0" = _foreach_div_2[113]
+	        getitem_2778: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[114]
+	        getitem_2779: "f32[768][1]cuda:0" = _foreach_div_2[115]
+	        getitem_2780: "f32[768][1]cuda:0" = _foreach_div_2[116]
+	        getitem_2781: "f32[768][1]cuda:0" = _foreach_div_2[117]
+	        getitem_2782: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[118]
+	        getitem_2783: "f32[3072][1]cuda:0" = _foreach_div_2[119]
+	        getitem_2784: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[120]
+	        getitem_2785: "f32[768][1]cuda:0" = _foreach_div_2[121]
+	        getitem_2786: "f32[768][1]cuda:0" = _foreach_div_2[122]
+	        getitem_2787: "f32[768][1]cuda:0" = _foreach_div_2[123]
+	        getitem_2788: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[124]
+	        getitem_2789: "f32[2304][1]cuda:0" = _foreach_div_2[125]
+	        getitem_2790: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[126]
+	        getitem_2791: "f32[768][1]cuda:0" = _foreach_div_2[127]
+	        getitem_2792: "f32[768][1]cuda:0" = _foreach_div_2[128]
+	        getitem_2793: "f32[768][1]cuda:0" = _foreach_div_2[129]
+	        getitem_2794: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[130]
+	        getitem_2795: "f32[3072][1]cuda:0" = _foreach_div_2[131]
+	        getitem_2796: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[132]
+	        getitem_2797: "f32[768][1]cuda:0" = _foreach_div_2[133]
+	        getitem_2798: "f32[768][1]cuda:0" = _foreach_div_2[134]
+	        getitem_2799: "f32[768][1]cuda:0" = _foreach_div_2[135]
+	        getitem_2800: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_2[136]
+	        getitem_2801: "f32[2304][1]cuda:0" = _foreach_div_2[137]
+	        getitem_2802: "f32[768, 768][768, 1]cuda:0" = _foreach_div_2[138]
+	        getitem_2803: "f32[768][1]cuda:0" = _foreach_div_2[139]
+	        getitem_2804: "f32[768][1]cuda:0" = _foreach_div_2[140]
+	        getitem_2805: "f32[768][1]cuda:0" = _foreach_div_2[141]
+	        getitem_2806: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_2[142]
+	        getitem_2807: "f32[3072][1]cuda:0" = _foreach_div_2[143]
+	        getitem_2808: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_2[144]
+	        getitem_2809: "f32[768][1]cuda:0" = _foreach_div_2[145]
+	        getitem_2810: "f32[768][1]cuda:0" = _foreach_div_2[146]
+	        getitem_2811: "f32[768][1]cuda:0" = _foreach_div_2[147];  _foreach_div_2 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:589 in _multi_tensor_adam, code: torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt)
+	        _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]);  getitem_444 = getitem_445 = getitem_446 = getitem_447 = getitem_448 = getitem_449 = getitem_450 = getitem_451 = getitem_452 = getitem_453 = getitem_454 = getitem_455 = getitem_456 = getitem_457 = getitem_458 = getitem_459 = getitem_460 = getitem_461 = getitem_462 = getitem_463 = getitem_464 = getitem_465 = getitem_466 = getitem_467 = getitem_468 = getitem_469 = getitem_470 = getitem_471 = getitem_472 = getitem_473 = getitem_474 = getitem_475 = getitem_476 = getitem_477 = getitem_478 = getitem_479 = getitem_480 = getitem_481 = getitem_482 = getitem_483 = getitem_484 = getitem_485 = getitem_486 = getitem_487 = getitem_488 = getitem_489 = getitem_490 = getitem_491 = getitem_492 = getitem_493 = getitem_494 = getitem_495 = getitem_496 = getitem_497 = getitem_498 = getitem_499 = getitem_500 = getitem_501 = getitem_502 = getitem_503 = getitem_504 = getitem_505 = getitem_506 = getitem_507 = getitem_508 = getitem_509 = getitem_510 = getitem_511 = getitem_512 = getitem_513 = getitem_514 = getitem_515 = getitem_516 = getitem_517 = getitem_518 = getitem_519 = getitem_520 = getitem_521 = getitem_522 = getitem_523 = getitem_524 = getitem_525 = getitem_526 = getitem_527 = getitem_528 = getitem_529 = getitem_530 = getitem_531 = getitem_532 = getitem_533 = getitem_534 = getitem_535 = getitem_536 = getitem_537 = getitem_538 = getitem_539 = getitem_540 = getitem_541 = getitem_542 = getitem_543 = getitem_544 = getitem_545 = getitem_546 = getitem_547 = getitem_548 = getitem_549 = getitem_550 = getitem_551 = getitem_552 = getitem_553 = getitem_554 = getitem_555 = getitem_556 = getitem_557 = getitem_558 = getitem_559 = getitem_560 = getitem_561 = getitem_562 = getitem_563 = getitem_564 = getitem_565 = getitem_566 = getitem_567 = getitem_568 = getitem_569 = getitem_570 = getitem_571 = getitem_572 = getitem_573 = getitem_574 = getitem_575 = getitem_576 = getitem_577 = getitem_578 = getitem_579 = getitem_580 = getitem_581 = getitem_582 = getitem_583 = getitem_584 = getitem_585 = getitem_586 = getitem_587 = getitem_588 = getitem_589 = getitem_590 = getitem_591 = getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None
+	        getitem_2812: "f32[50304, 768][768, 1]cuda:0" = _foreach_div_3[0]
+	        getitem_2813: "f32[1024, 768][768, 1]cuda:0" = _foreach_div_3[1]
+	        getitem_2814: "f32[768][1]cuda:0" = _foreach_div_3[2]
+	        getitem_2815: "f32[768][1]cuda:0" = _foreach_div_3[3]
+	        getitem_2816: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[4]
+	        getitem_2817: "f32[2304][1]cuda:0" = _foreach_div_3[5]
+	        getitem_2818: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[6]
+	        getitem_2819: "f32[768][1]cuda:0" = _foreach_div_3[7]
+	        getitem_2820: "f32[768][1]cuda:0" = _foreach_div_3[8]
+	        getitem_2821: "f32[768][1]cuda:0" = _foreach_div_3[9]
+	        getitem_2822: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[10]
+	        getitem_2823: "f32[3072][1]cuda:0" = _foreach_div_3[11]
+	        getitem_2824: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[12]
+	        getitem_2825: "f32[768][1]cuda:0" = _foreach_div_3[13]
+	        getitem_2826: "f32[768][1]cuda:0" = _foreach_div_3[14]
+	        getitem_2827: "f32[768][1]cuda:0" = _foreach_div_3[15]
+	        getitem_2828: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[16]
+	        getitem_2829: "f32[2304][1]cuda:0" = _foreach_div_3[17]
+	        getitem_2830: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[18]
+	        getitem_2831: "f32[768][1]cuda:0" = _foreach_div_3[19]
+	        getitem_2832: "f32[768][1]cuda:0" = _foreach_div_3[20]
+	        getitem_2833: "f32[768][1]cuda:0" = _foreach_div_3[21]
+	        getitem_2834: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[22]
+	        getitem_2835: "f32[3072][1]cuda:0" = _foreach_div_3[23]
+	        getitem_2836: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[24]
+	        getitem_2837: "f32[768][1]cuda:0" = _foreach_div_3[25]
+	        getitem_2838: "f32[768][1]cuda:0" = _foreach_div_3[26]
+	        getitem_2839: "f32[768][1]cuda:0" = _foreach_div_3[27]
+	        getitem_2840: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[28]
+	        getitem_2841: "f32[2304][1]cuda:0" = _foreach_div_3[29]
+	        getitem_2842: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[30]
+	        getitem_2843: "f32[768][1]cuda:0" = _foreach_div_3[31]
+	        getitem_2844: "f32[768][1]cuda:0" = _foreach_div_3[32]
+	        getitem_2845: "f32[768][1]cuda:0" = _foreach_div_3[33]
+	        getitem_2846: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[34]
+	        getitem_2847: "f32[3072][1]cuda:0" = _foreach_div_3[35]
+	        getitem_2848: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[36]
+	        getitem_2849: "f32[768][1]cuda:0" = _foreach_div_3[37]
+	        getitem_2850: "f32[768][1]cuda:0" = _foreach_div_3[38]
+	        getitem_2851: "f32[768][1]cuda:0" = _foreach_div_3[39]
+	        getitem_2852: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[40]
+	        getitem_2853: "f32[2304][1]cuda:0" = _foreach_div_3[41]
+	        getitem_2854: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[42]
+	        getitem_2855: "f32[768][1]cuda:0" = _foreach_div_3[43]
+	        getitem_2856: "f32[768][1]cuda:0" = _foreach_div_3[44]
+	        getitem_2857: "f32[768][1]cuda:0" = _foreach_div_3[45]
+	        getitem_2858: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[46]
+	        getitem_2859: "f32[3072][1]cuda:0" = _foreach_div_3[47]
+	        getitem_2860: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[48]
+	        getitem_2861: "f32[768][1]cuda:0" = _foreach_div_3[49]
+	        getitem_2862: "f32[768][1]cuda:0" = _foreach_div_3[50]
+	        getitem_2863: "f32[768][1]cuda:0" = _foreach_div_3[51]
+	        getitem_2864: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[52]
+	        getitem_2865: "f32[2304][1]cuda:0" = _foreach_div_3[53]
+	        getitem_2866: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[54]
+	        getitem_2867: "f32[768][1]cuda:0" = _foreach_div_3[55]
+	        getitem_2868: "f32[768][1]cuda:0" = _foreach_div_3[56]
+	        getitem_2869: "f32[768][1]cuda:0" = _foreach_div_3[57]
+	        getitem_2870: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[58]
+	        getitem_2871: "f32[3072][1]cuda:0" = _foreach_div_3[59]
+	        getitem_2872: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[60]
+	        getitem_2873: "f32[768][1]cuda:0" = _foreach_div_3[61]
+	        getitem_2874: "f32[768][1]cuda:0" = _foreach_div_3[62]
+	        getitem_2875: "f32[768][1]cuda:0" = _foreach_div_3[63]
+	        getitem_2876: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[64]
+	        getitem_2877: "f32[2304][1]cuda:0" = _foreach_div_3[65]
+	        getitem_2878: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[66]
+	        getitem_2879: "f32[768][1]cuda:0" = _foreach_div_3[67]
+	        getitem_2880: "f32[768][1]cuda:0" = _foreach_div_3[68]
+	        getitem_2881: "f32[768][1]cuda:0" = _foreach_div_3[69]
+	        getitem_2882: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[70]
+	        getitem_2883: "f32[3072][1]cuda:0" = _foreach_div_3[71]
+	        getitem_2884: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[72]
+	        getitem_2885: "f32[768][1]cuda:0" = _foreach_div_3[73]
+	        getitem_2886: "f32[768][1]cuda:0" = _foreach_div_3[74]
+	        getitem_2887: "f32[768][1]cuda:0" = _foreach_div_3[75]
+	        getitem_2888: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[76]
+	        getitem_2889: "f32[2304][1]cuda:0" = _foreach_div_3[77]
+	        getitem_2890: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[78]
+	        getitem_2891: "f32[768][1]cuda:0" = _foreach_div_3[79]
+	        getitem_2892: "f32[768][1]cuda:0" = _foreach_div_3[80]
+	        getitem_2893: "f32[768][1]cuda:0" = _foreach_div_3[81]
+	        getitem_2894: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[82]
+	        getitem_2895: "f32[3072][1]cuda:0" = _foreach_div_3[83]
+	        getitem_2896: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[84]
+	        getitem_2897: "f32[768][1]cuda:0" = _foreach_div_3[85]
+	        getitem_2898: "f32[768][1]cuda:0" = _foreach_div_3[86]
+	        getitem_2899: "f32[768][1]cuda:0" = _foreach_div_3[87]
+	        getitem_2900: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[88]
+	        getitem_2901: "f32[2304][1]cuda:0" = _foreach_div_3[89]
+	        getitem_2902: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[90]
+	        getitem_2903: "f32[768][1]cuda:0" = _foreach_div_3[91]
+	        getitem_2904: "f32[768][1]cuda:0" = _foreach_div_3[92]
+	        getitem_2905: "f32[768][1]cuda:0" = _foreach_div_3[93]
+	        getitem_2906: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[94]
+	        getitem_2907: "f32[3072][1]cuda:0" = _foreach_div_3[95]
+	        getitem_2908: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[96]
+	        getitem_2909: "f32[768][1]cuda:0" = _foreach_div_3[97]
+	        getitem_2910: "f32[768][1]cuda:0" = _foreach_div_3[98]
+	        getitem_2911: "f32[768][1]cuda:0" = _foreach_div_3[99]
+	        getitem_2912: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[100]
+	        getitem_2913: "f32[2304][1]cuda:0" = _foreach_div_3[101]
+	        getitem_2914: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[102]
+	        getitem_2915: "f32[768][1]cuda:0" = _foreach_div_3[103]
+	        getitem_2916: "f32[768][1]cuda:0" = _foreach_div_3[104]
+	        getitem_2917: "f32[768][1]cuda:0" = _foreach_div_3[105]
+	        getitem_2918: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[106]
+	        getitem_2919: "f32[3072][1]cuda:0" = _foreach_div_3[107]
+	        getitem_2920: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[108]
+	        getitem_2921: "f32[768][1]cuda:0" = _foreach_div_3[109]
+	        getitem_2922: "f32[768][1]cuda:0" = _foreach_div_3[110]
+	        getitem_2923: "f32[768][1]cuda:0" = _foreach_div_3[111]
+	        getitem_2924: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[112]
+	        getitem_2925: "f32[2304][1]cuda:0" = _foreach_div_3[113]
+	        getitem_2926: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[114]
+	        getitem_2927: "f32[768][1]cuda:0" = _foreach_div_3[115]
+	        getitem_2928: "f32[768][1]cuda:0" = _foreach_div_3[116]
+	        getitem_2929: "f32[768][1]cuda:0" = _foreach_div_3[117]
+	        getitem_2930: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[118]
+	        getitem_2931: "f32[3072][1]cuda:0" = _foreach_div_3[119]
+	        getitem_2932: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[120]
+	        getitem_2933: "f32[768][1]cuda:0" = _foreach_div_3[121]
+	        getitem_2934: "f32[768][1]cuda:0" = _foreach_div_3[122]
+	        getitem_2935: "f32[768][1]cuda:0" = _foreach_div_3[123]
+	        getitem_2936: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[124]
+	        getitem_2937: "f32[2304][1]cuda:0" = _foreach_div_3[125]
+	        getitem_2938: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[126]
+	        getitem_2939: "f32[768][1]cuda:0" = _foreach_div_3[127]
+	        getitem_2940: "f32[768][1]cuda:0" = _foreach_div_3[128]
+	        getitem_2941: "f32[768][1]cuda:0" = _foreach_div_3[129]
+	        getitem_2942: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[130]
+	        getitem_2943: "f32[3072][1]cuda:0" = _foreach_div_3[131]
+	        getitem_2944: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[132]
+	        getitem_2945: "f32[768][1]cuda:0" = _foreach_div_3[133]
+	        getitem_2946: "f32[768][1]cuda:0" = _foreach_div_3[134]
+	        getitem_2947: "f32[768][1]cuda:0" = _foreach_div_3[135]
+	        getitem_2948: "f32[2304, 768][768, 1]cuda:0" = _foreach_div_3[136]
+	        getitem_2949: "f32[2304][1]cuda:0" = _foreach_div_3[137]
+	        getitem_2950: "f32[768, 768][768, 1]cuda:0" = _foreach_div_3[138]
+	        getitem_2951: "f32[768][1]cuda:0" = _foreach_div_3[139]
+	        getitem_2952: "f32[768][1]cuda:0" = _foreach_div_3[140]
+	        getitem_2953: "f32[768][1]cuda:0" = _foreach_div_3[141]
+	        getitem_2954: "f32[3072, 768][768, 1]cuda:0" = _foreach_div_3[142]
+	        getitem_2955: "f32[3072][1]cuda:0" = _foreach_div_3[143]
+	        getitem_2956: "f32[768, 3072][3072, 1]cuda:0" = _foreach_div_3[144]
+	        getitem_2957: "f32[768][1]cuda:0" = _foreach_div_3[145]
+	        getitem_2958: "f32[768][1]cuda:0" = _foreach_div_3[146]
+	        getitem_2959: "f32[768][1]cuda:0" = _foreach_div_3[147];  _foreach_div_3 = None
+	        _foreach_add_4 = torch.ops.aten._foreach_add_.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]);  arg0_1 = arg1_1 = arg2_1 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = arg11_1 = arg12_1 = arg13_1 = arg14_1 = arg15_1 = arg16_1 = arg17_1 = arg18_1 = arg19_1 = arg20_1 = arg21_1 = arg22_1 = arg23_1 = arg24_1 = arg25_1 = arg26_1 = arg27_1 = arg28_1 = arg29_1 = arg30_1 = arg31_1 = arg32_1 = arg33_1 = arg34_1 = arg35_1 = arg36_1 = arg37_1 = arg38_1 = arg39_1 = arg40_1 = arg41_1 = arg42_1 = arg43_1 = arg44_1 = arg45_1 = arg46_1 = arg47_1 = arg48_1 = arg49_1 = arg50_1 = arg51_1 = arg52_1 = arg53_1 = arg54_1 = arg55_1 = arg56_1 = arg57_1 = arg58_1 = arg59_1 = arg60_1 = arg61_1 = arg62_1 = arg63_1 = arg64_1 = arg65_1 = arg66_1 = arg67_1 = arg68_1 = arg69_1 = arg70_1 = arg71_1 = arg72_1 = arg73_1 = arg74_1 = arg75_1 = arg76_1 = arg77_1 = arg78_1 = arg79_1 = arg80_1 = arg81_1 = arg82_1 = arg83_1 = arg84_1 = arg85_1 = arg86_1 = arg87_1 = arg88_1 = arg89_1 = arg90_1 = arg91_1 = arg92_1 = arg93_1 = arg94_1 = arg95_1 = arg96_1 = arg97_1 = arg98_1 = arg99_1 = arg100_1 = arg101_1 = arg102_1 = arg103_1 = arg104_1 = arg105_1 = arg106_1 = arg107_1 = arg108_1 = arg109_1 = arg110_1 = arg111_1 = arg112_1 = arg113_1 = arg114_1 = arg115_1 = arg116_1 = arg117_1 = arg118_1 = arg119_1 = arg120_1 = arg121_1 = arg122_1 = arg123_1 = arg124_1 = arg125_1 = arg126_1 = arg127_1 = arg128_1 = arg129_1 = arg130_1 = arg131_1 = arg132_1 = arg133_1 = arg134_1 = arg135_1 = arg136_1 = arg137_1 = arg138_1 = arg139_1 = arg140_1 = arg141_1 = arg142_1 = arg143_1 = arg144_1 = arg145_1 = arg146_1 = arg147_1 = getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None
+	        getitem_2960: "f32[50304, 768][768, 1]cuda:0" = _foreach_add_4[0];  getitem_2960 = None
+	        getitem_2961: "f32[1024, 768][768, 1]cuda:0" = _foreach_add_4[1];  getitem_2961 = None
+	        getitem_2962: "f32[768][1]cuda:0" = _foreach_add_4[2];  getitem_2962 = None
+	        getitem_2963: "f32[768][1]cuda:0" = _foreach_add_4[3];  getitem_2963 = None
+	        getitem_2964: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[4];  getitem_2964 = None
+	        getitem_2965: "f32[2304][1]cuda:0" = _foreach_add_4[5];  getitem_2965 = None
+	        getitem_2966: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[6];  getitem_2966 = None
+	        getitem_2967: "f32[768][1]cuda:0" = _foreach_add_4[7];  getitem_2967 = None
+	        getitem_2968: "f32[768][1]cuda:0" = _foreach_add_4[8];  getitem_2968 = None
+	        getitem_2969: "f32[768][1]cuda:0" = _foreach_add_4[9];  getitem_2969 = None
+	        getitem_2970: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[10];  getitem_2970 = None
+	        getitem_2971: "f32[3072][1]cuda:0" = _foreach_add_4[11];  getitem_2971 = None
+	        getitem_2972: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[12];  getitem_2972 = None
+	        getitem_2973: "f32[768][1]cuda:0" = _foreach_add_4[13];  getitem_2973 = None
+	        getitem_2974: "f32[768][1]cuda:0" = _foreach_add_4[14];  getitem_2974 = None
+	        getitem_2975: "f32[768][1]cuda:0" = _foreach_add_4[15];  getitem_2975 = None
+	        getitem_2976: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[16];  getitem_2976 = None
+	        getitem_2977: "f32[2304][1]cuda:0" = _foreach_add_4[17];  getitem_2977 = None
+	        getitem_2978: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[18];  getitem_2978 = None
+	        getitem_2979: "f32[768][1]cuda:0" = _foreach_add_4[19];  getitem_2979 = None
+	        getitem_2980: "f32[768][1]cuda:0" = _foreach_add_4[20];  getitem_2980 = None
+	        getitem_2981: "f32[768][1]cuda:0" = _foreach_add_4[21];  getitem_2981 = None
+	        getitem_2982: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[22];  getitem_2982 = None
+	        getitem_2983: "f32[3072][1]cuda:0" = _foreach_add_4[23];  getitem_2983 = None
+	        getitem_2984: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[24];  getitem_2984 = None
+	        getitem_2985: "f32[768][1]cuda:0" = _foreach_add_4[25];  getitem_2985 = None
+	        getitem_2986: "f32[768][1]cuda:0" = _foreach_add_4[26];  getitem_2986 = None
+	        getitem_2987: "f32[768][1]cuda:0" = _foreach_add_4[27];  getitem_2987 = None
+	        getitem_2988: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[28];  getitem_2988 = None
+	        getitem_2989: "f32[2304][1]cuda:0" = _foreach_add_4[29];  getitem_2989 = None
+	        getitem_2990: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[30];  getitem_2990 = None
+	        getitem_2991: "f32[768][1]cuda:0" = _foreach_add_4[31];  getitem_2991 = None
+	        getitem_2992: "f32[768][1]cuda:0" = _foreach_add_4[32];  getitem_2992 = None
+	        getitem_2993: "f32[768][1]cuda:0" = _foreach_add_4[33];  getitem_2993 = None
+	        getitem_2994: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[34];  getitem_2994 = None
+	        getitem_2995: "f32[3072][1]cuda:0" = _foreach_add_4[35];  getitem_2995 = None
+	        getitem_2996: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[36];  getitem_2996 = None
+	        getitem_2997: "f32[768][1]cuda:0" = _foreach_add_4[37];  getitem_2997 = None
+	        getitem_2998: "f32[768][1]cuda:0" = _foreach_add_4[38];  getitem_2998 = None
+	        getitem_2999: "f32[768][1]cuda:0" = _foreach_add_4[39];  getitem_2999 = None
+	        getitem_3000: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[40];  getitem_3000 = None
+	        getitem_3001: "f32[2304][1]cuda:0" = _foreach_add_4[41];  getitem_3001 = None
+	        getitem_3002: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[42];  getitem_3002 = None
+	        getitem_3003: "f32[768][1]cuda:0" = _foreach_add_4[43];  getitem_3003 = None
+	        getitem_3004: "f32[768][1]cuda:0" = _foreach_add_4[44];  getitem_3004 = None
+	        getitem_3005: "f32[768][1]cuda:0" = _foreach_add_4[45];  getitem_3005 = None
+	        getitem_3006: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[46];  getitem_3006 = None
+	        getitem_3007: "f32[3072][1]cuda:0" = _foreach_add_4[47];  getitem_3007 = None
+	        getitem_3008: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[48];  getitem_3008 = None
+	        getitem_3009: "f32[768][1]cuda:0" = _foreach_add_4[49];  getitem_3009 = None
+	        getitem_3010: "f32[768][1]cuda:0" = _foreach_add_4[50];  getitem_3010 = None
+	        getitem_3011: "f32[768][1]cuda:0" = _foreach_add_4[51];  getitem_3011 = None
+	        getitem_3012: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[52];  getitem_3012 = None
+	        getitem_3013: "f32[2304][1]cuda:0" = _foreach_add_4[53];  getitem_3013 = None
+	        getitem_3014: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[54];  getitem_3014 = None
+	        getitem_3015: "f32[768][1]cuda:0" = _foreach_add_4[55];  getitem_3015 = None
+	        getitem_3016: "f32[768][1]cuda:0" = _foreach_add_4[56];  getitem_3016 = None
+	        getitem_3017: "f32[768][1]cuda:0" = _foreach_add_4[57];  getitem_3017 = None
+	        getitem_3018: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[58];  getitem_3018 = None
+	        getitem_3019: "f32[3072][1]cuda:0" = _foreach_add_4[59];  getitem_3019 = None
+	        getitem_3020: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[60];  getitem_3020 = None
+	        getitem_3021: "f32[768][1]cuda:0" = _foreach_add_4[61];  getitem_3021 = None
+	        getitem_3022: "f32[768][1]cuda:0" = _foreach_add_4[62];  getitem_3022 = None
+	        getitem_3023: "f32[768][1]cuda:0" = _foreach_add_4[63];  getitem_3023 = None
+	        getitem_3024: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[64];  getitem_3024 = None
+	        getitem_3025: "f32[2304][1]cuda:0" = _foreach_add_4[65];  getitem_3025 = None
+	        getitem_3026: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[66];  getitem_3026 = None
+	        getitem_3027: "f32[768][1]cuda:0" = _foreach_add_4[67];  getitem_3027 = None
+	        getitem_3028: "f32[768][1]cuda:0" = _foreach_add_4[68];  getitem_3028 = None
+	        getitem_3029: "f32[768][1]cuda:0" = _foreach_add_4[69];  getitem_3029 = None
+	        getitem_3030: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[70];  getitem_3030 = None
+	        getitem_3031: "f32[3072][1]cuda:0" = _foreach_add_4[71];  getitem_3031 = None
+	        getitem_3032: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[72];  getitem_3032 = None
+	        getitem_3033: "f32[768][1]cuda:0" = _foreach_add_4[73];  getitem_3033 = None
+	        getitem_3034: "f32[768][1]cuda:0" = _foreach_add_4[74];  getitem_3034 = None
+	        getitem_3035: "f32[768][1]cuda:0" = _foreach_add_4[75];  getitem_3035 = None
+	        getitem_3036: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[76];  getitem_3036 = None
+	        getitem_3037: "f32[2304][1]cuda:0" = _foreach_add_4[77];  getitem_3037 = None
+	        getitem_3038: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[78];  getitem_3038 = None
+	        getitem_3039: "f32[768][1]cuda:0" = _foreach_add_4[79];  getitem_3039 = None
+	        getitem_3040: "f32[768][1]cuda:0" = _foreach_add_4[80];  getitem_3040 = None
+	        getitem_3041: "f32[768][1]cuda:0" = _foreach_add_4[81];  getitem_3041 = None
+	        getitem_3042: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[82];  getitem_3042 = None
+	        getitem_3043: "f32[3072][1]cuda:0" = _foreach_add_4[83];  getitem_3043 = None
+	        getitem_3044: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[84];  getitem_3044 = None
+	        getitem_3045: "f32[768][1]cuda:0" = _foreach_add_4[85];  getitem_3045 = None
+	        getitem_3046: "f32[768][1]cuda:0" = _foreach_add_4[86];  getitem_3046 = None
+	        getitem_3047: "f32[768][1]cuda:0" = _foreach_add_4[87];  getitem_3047 = None
+	        getitem_3048: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[88];  getitem_3048 = None
+	        getitem_3049: "f32[2304][1]cuda:0" = _foreach_add_4[89];  getitem_3049 = None
+	        getitem_3050: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[90];  getitem_3050 = None
+	        getitem_3051: "f32[768][1]cuda:0" = _foreach_add_4[91];  getitem_3051 = None
+	        getitem_3052: "f32[768][1]cuda:0" = _foreach_add_4[92];  getitem_3052 = None
+	        getitem_3053: "f32[768][1]cuda:0" = _foreach_add_4[93];  getitem_3053 = None
+	        getitem_3054: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[94];  getitem_3054 = None
+	        getitem_3055: "f32[3072][1]cuda:0" = _foreach_add_4[95];  getitem_3055 = None
+	        getitem_3056: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[96];  getitem_3056 = None
+	        getitem_3057: "f32[768][1]cuda:0" = _foreach_add_4[97];  getitem_3057 = None
+	        getitem_3058: "f32[768][1]cuda:0" = _foreach_add_4[98];  getitem_3058 = None
+	        getitem_3059: "f32[768][1]cuda:0" = _foreach_add_4[99];  getitem_3059 = None
+	        getitem_3060: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[100];  getitem_3060 = None
+	        getitem_3061: "f32[2304][1]cuda:0" = _foreach_add_4[101];  getitem_3061 = None
+	        getitem_3062: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[102];  getitem_3062 = None
+	        getitem_3063: "f32[768][1]cuda:0" = _foreach_add_4[103];  getitem_3063 = None
+	        getitem_3064: "f32[768][1]cuda:0" = _foreach_add_4[104];  getitem_3064 = None
+	        getitem_3065: "f32[768][1]cuda:0" = _foreach_add_4[105];  getitem_3065 = None
+	        getitem_3066: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[106];  getitem_3066 = None
+	        getitem_3067: "f32[3072][1]cuda:0" = _foreach_add_4[107];  getitem_3067 = None
+	        getitem_3068: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[108];  getitem_3068 = None
+	        getitem_3069: "f32[768][1]cuda:0" = _foreach_add_4[109];  getitem_3069 = None
+	        getitem_3070: "f32[768][1]cuda:0" = _foreach_add_4[110];  getitem_3070 = None
+	        getitem_3071: "f32[768][1]cuda:0" = _foreach_add_4[111];  getitem_3071 = None
+	        getitem_3072: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[112];  getitem_3072 = None
+	        getitem_3073: "f32[2304][1]cuda:0" = _foreach_add_4[113];  getitem_3073 = None
+	        getitem_3074: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[114];  getitem_3074 = None
+	        getitem_3075: "f32[768][1]cuda:0" = _foreach_add_4[115];  getitem_3075 = None
+	        getitem_3076: "f32[768][1]cuda:0" = _foreach_add_4[116];  getitem_3076 = None
+	        getitem_3077: "f32[768][1]cuda:0" = _foreach_add_4[117];  getitem_3077 = None
+	        getitem_3078: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[118];  getitem_3078 = None
+	        getitem_3079: "f32[3072][1]cuda:0" = _foreach_add_4[119];  getitem_3079 = None
+	        getitem_3080: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[120];  getitem_3080 = None
+	        getitem_3081: "f32[768][1]cuda:0" = _foreach_add_4[121];  getitem_3081 = None
+	        getitem_3082: "f32[768][1]cuda:0" = _foreach_add_4[122];  getitem_3082 = None
+	        getitem_3083: "f32[768][1]cuda:0" = _foreach_add_4[123];  getitem_3083 = None
+	        getitem_3084: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[124];  getitem_3084 = None
+	        getitem_3085: "f32[2304][1]cuda:0" = _foreach_add_4[125];  getitem_3085 = None
+	        getitem_3086: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[126];  getitem_3086 = None
+	        getitem_3087: "f32[768][1]cuda:0" = _foreach_add_4[127];  getitem_3087 = None
+	        getitem_3088: "f32[768][1]cuda:0" = _foreach_add_4[128];  getitem_3088 = None
+	        getitem_3089: "f32[768][1]cuda:0" = _foreach_add_4[129];  getitem_3089 = None
+	        getitem_3090: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[130];  getitem_3090 = None
+	        getitem_3091: "f32[3072][1]cuda:0" = _foreach_add_4[131];  getitem_3091 = None
+	        getitem_3092: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[132];  getitem_3092 = None
+	        getitem_3093: "f32[768][1]cuda:0" = _foreach_add_4[133];  getitem_3093 = None
+	        getitem_3094: "f32[768][1]cuda:0" = _foreach_add_4[134];  getitem_3094 = None
+	        getitem_3095: "f32[768][1]cuda:0" = _foreach_add_4[135];  getitem_3095 = None
+	        getitem_3096: "f32[2304, 768][768, 1]cuda:0" = _foreach_add_4[136];  getitem_3096 = None
+	        getitem_3097: "f32[2304][1]cuda:0" = _foreach_add_4[137];  getitem_3097 = None
+	        getitem_3098: "f32[768, 768][768, 1]cuda:0" = _foreach_add_4[138];  getitem_3098 = None
+	        getitem_3099: "f32[768][1]cuda:0" = _foreach_add_4[139];  getitem_3099 = None
+	        getitem_3100: "f32[768][1]cuda:0" = _foreach_add_4[140];  getitem_3100 = None
+	        getitem_3101: "f32[768][1]cuda:0" = _foreach_add_4[141];  getitem_3101 = None
+	        getitem_3102: "f32[3072, 768][768, 1]cuda:0" = _foreach_add_4[142];  getitem_3102 = None
+	        getitem_3103: "f32[3072][1]cuda:0" = _foreach_add_4[143];  getitem_3103 = None
+	        getitem_3104: "f32[768, 3072][3072, 1]cuda:0" = _foreach_add_4[144];  getitem_3104 = None
+	        getitem_3105: "f32[768][1]cuda:0" = _foreach_add_4[145];  getitem_3105 = None
+	        getitem_3106: "f32[768][1]cuda:0" = _foreach_add_4[146];  getitem_3106 = None
+	        getitem_3107: "f32[768][1]cuda:0" = _foreach_add_4[147];  _foreach_add_4 = getitem_3107 = None
+	        
+	         # File: /data/users/jjwu/a/pytorch/torch/optim/adam.py:544 in _multi_tensor_adam, code: torch._foreach_addcmul_(
+	        copy__150: "f32[1024, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg150_1, getitem_889);  arg150_1 = getitem_889 = copy__150 = None
+	        copy__298: "f32[50304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg446_1, getitem_888);  arg446_1 = getitem_888 = copy__298 = None
+	        copy__299: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg447_1, getitem_890);  arg447_1 = getitem_890 = copy__299 = None
+	        copy__300: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg448_1, getitem_891);  arg448_1 = getitem_891 = copy__300 = None
+	        copy__301: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg449_1, getitem_892);  arg449_1 = getitem_892 = copy__301 = None
+	        copy__302: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg450_1, getitem_893);  arg450_1 = getitem_893 = copy__302 = None
+	        copy__303: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg451_1, getitem_894);  arg451_1 = getitem_894 = copy__303 = None
+	        copy__304: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg452_1, getitem_895);  arg452_1 = getitem_895 = copy__304 = None
+	        copy__305: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg453_1, getitem_896);  arg453_1 = getitem_896 = copy__305 = None
+	        copy__306: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg454_1, getitem_897);  arg454_1 = getitem_897 = copy__306 = None
+	        copy__307: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg455_1, getitem_898);  arg455_1 = getitem_898 = copy__307 = None
+	        copy__308: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg456_1, getitem_899);  arg456_1 = getitem_899 = copy__308 = None
+	        copy__309: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg457_1, getitem_900);  arg457_1 = getitem_900 = copy__309 = None
+	        copy__310: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg458_1, getitem_901);  arg458_1 = getitem_901 = copy__310 = None
+	        copy__311: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg459_1, getitem_902);  arg459_1 = getitem_902 = copy__311 = None
+	        copy__312: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg460_1, getitem_903);  arg460_1 = getitem_903 = copy__312 = None
+	        copy__313: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg461_1, getitem_904);  arg461_1 = getitem_904 = copy__313 = None
+	        copy__314: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg462_1, getitem_905);  arg462_1 = getitem_905 = copy__314 = None
+	        copy__315: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg463_1, getitem_906);  arg463_1 = getitem_906 = copy__315 = None
+	        copy__316: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg464_1, getitem_907);  arg464_1 = getitem_907 = copy__316 = None
+	        copy__317: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg465_1, getitem_908);  arg465_1 = getitem_908 = copy__317 = None
+	        copy__318: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg466_1, getitem_909);  arg466_1 = getitem_909 = copy__318 = None
+	        copy__319: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg467_1, getitem_910);  arg467_1 = getitem_910 = copy__319 = None
+	        copy__320: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg468_1, getitem_911);  arg468_1 = getitem_911 = copy__320 = None
+	        copy__321: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg469_1, getitem_912);  arg469_1 = getitem_912 = copy__321 = None
+	        copy__322: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg470_1, getitem_913);  arg470_1 = getitem_913 = copy__322 = None
+	        copy__323: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg471_1, getitem_914);  arg471_1 = getitem_914 = copy__323 = None
+	        copy__324: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg472_1, getitem_915);  arg472_1 = getitem_915 = copy__324 = None
+	        copy__325: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg473_1, getitem_916);  arg473_1 = getitem_916 = copy__325 = None
+	        copy__326: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg474_1, getitem_917);  arg474_1 = getitem_917 = copy__326 = None
+	        copy__327: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg475_1, getitem_918);  arg475_1 = getitem_918 = copy__327 = None
+	        copy__328: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg476_1, getitem_919);  arg476_1 = getitem_919 = copy__328 = None
+	        copy__329: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg477_1, getitem_920);  arg477_1 = getitem_920 = copy__329 = None
+	        copy__330: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg478_1, getitem_921);  arg478_1 = getitem_921 = copy__330 = None
+	        copy__331: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg479_1, getitem_922);  arg479_1 = getitem_922 = copy__331 = None
+	        copy__332: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg480_1, getitem_923);  arg480_1 = getitem_923 = copy__332 = None
+	        copy__333: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg481_1, getitem_924);  arg481_1 = getitem_924 = copy__333 = None
+	        copy__334: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg482_1, getitem_925);  arg482_1 = getitem_925 = copy__334 = None
+	        copy__335: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg483_1, getitem_926);  arg483_1 = getitem_926 = copy__335 = None
+	        copy__336: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg484_1, getitem_927);  arg484_1 = getitem_927 = copy__336 = None
+	        copy__337: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg485_1, getitem_928);  arg485_1 = getitem_928 = copy__337 = None
+	        copy__338: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg486_1, getitem_929);  arg486_1 = getitem_929 = copy__338 = None
+	        copy__339: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg487_1, getitem_930);  arg487_1 = getitem_930 = copy__339 = None
+	        copy__340: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg488_1, getitem_931);  arg488_1 = getitem_931 = copy__340 = None
+	        copy__341: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg489_1, getitem_932);  arg489_1 = getitem_932 = copy__341 = None
+	        copy__342: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg490_1, getitem_933);  arg490_1 = getitem_933 = copy__342 = None
+	        copy__343: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg491_1, getitem_934);  arg491_1 = getitem_934 = copy__343 = None
+	        copy__344: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg492_1, getitem_935);  arg492_1 = getitem_935 = copy__344 = None
+	        copy__345: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg493_1, getitem_936);  arg493_1 = getitem_936 = copy__345 = None
+	        copy__346: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg494_1, getitem_937);  arg494_1 = getitem_937 = copy__346 = None
+	        copy__347: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg495_1, getitem_938);  arg495_1 = getitem_938 = copy__347 = None
+	        copy__348: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg496_1, getitem_939);  arg496_1 = getitem_939 = copy__348 = None
+	        copy__349: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg497_1, getitem_940);  arg497_1 = getitem_940 = copy__349 = None
+	        copy__350: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg498_1, getitem_941);  arg498_1 = getitem_941 = copy__350 = None
+	        copy__351: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg499_1, getitem_942);  arg499_1 = getitem_942 = copy__351 = None
+	        copy__352: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg500_1, getitem_943);  arg500_1 = getitem_943 = copy__352 = None
+	        copy__353: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg501_1, getitem_944);  arg501_1 = getitem_944 = copy__353 = None
+	        copy__354: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg502_1, getitem_945);  arg502_1 = getitem_945 = copy__354 = None
+	        copy__355: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg503_1, getitem_946);  arg503_1 = getitem_946 = copy__355 = None
+	        copy__356: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg504_1, getitem_947);  arg504_1 = getitem_947 = copy__356 = None
+	        copy__357: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg505_1, getitem_948);  arg505_1 = getitem_948 = copy__357 = None
+	        copy__358: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg506_1, getitem_949);  arg506_1 = getitem_949 = copy__358 = None
+	        copy__359: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg507_1, getitem_950);  arg507_1 = getitem_950 = copy__359 = None
+	        copy__360: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg508_1, getitem_951);  arg508_1 = getitem_951 = copy__360 = None
+	        copy__361: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg509_1, getitem_952);  arg509_1 = getitem_952 = copy__361 = None
+	        copy__362: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg510_1, getitem_953);  arg510_1 = getitem_953 = copy__362 = None
+	        copy__363: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg511_1, getitem_954);  arg511_1 = getitem_954 = copy__363 = None
+	        copy__364: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg512_1, getitem_955);  arg512_1 = getitem_955 = copy__364 = None
+	        copy__365: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg513_1, getitem_956);  arg513_1 = getitem_956 = copy__365 = None
+	        copy__366: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg514_1, getitem_957);  arg514_1 = getitem_957 = copy__366 = None
+	        copy__367: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg515_1, getitem_958);  arg515_1 = getitem_958 = copy__367 = None
+	        copy__368: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg516_1, getitem_959);  arg516_1 = getitem_959 = copy__368 = None
+	        copy__369: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg517_1, getitem_960);  arg517_1 = getitem_960 = copy__369 = None
+	        copy__370: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg518_1, getitem_961);  arg518_1 = getitem_961 = copy__370 = None
+	        copy__371: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg519_1, getitem_962);  arg519_1 = getitem_962 = copy__371 = None
+	        copy__372: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg520_1, getitem_963);  arg520_1 = getitem_963 = copy__372 = None
+	        copy__373: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg521_1, getitem_964);  arg521_1 = getitem_964 = copy__373 = None
+	        copy__374: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg522_1, getitem_965);  arg522_1 = getitem_965 = copy__374 = None
+	        copy__375: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg523_1, getitem_966);  arg523_1 = getitem_966 = copy__375 = None
+	        copy__376: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg524_1, getitem_967);  arg524_1 = getitem_967 = copy__376 = None
+	        copy__377: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg525_1, getitem_968);  arg525_1 = getitem_968 = copy__377 = None
+	        copy__378: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg526_1, getitem_969);  arg526_1 = getitem_969 = copy__378 = None
+	        copy__379: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg527_1, getitem_970);  arg527_1 = getitem_970 = copy__379 = None
+	        copy__380: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg528_1, getitem_971);  arg528_1 = getitem_971 = copy__380 = None
+	        copy__381: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg529_1, getitem_972);  arg529_1 = getitem_972 = copy__381 = None
+	        copy__382: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg530_1, getitem_973);  arg530_1 = getitem_973 = copy__382 = None
+	        copy__383: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg531_1, getitem_974);  arg531_1 = getitem_974 = copy__383 = None
+	        copy__384: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg532_1, getitem_975);  arg532_1 = getitem_975 = copy__384 = None
+	        copy__385: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg533_1, getitem_976);  arg533_1 = getitem_976 = copy__385 = None
+	        copy__386: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg534_1, getitem_977);  arg534_1 = getitem_977 = copy__386 = None
+	        copy__387: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg535_1, getitem_978);  arg535_1 = getitem_978 = copy__387 = None
+	        copy__388: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg536_1, getitem_979);  arg536_1 = getitem_979 = copy__388 = None
+	        copy__389: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg537_1, getitem_980);  arg537_1 = getitem_980 = copy__389 = None
+	        copy__390: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg538_1, getitem_981);  arg538_1 = getitem_981 = copy__390 = None
+	        copy__391: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg539_1, getitem_982);  arg539_1 = getitem_982 = copy__391 = None
+	        copy__392: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg540_1, getitem_983);  arg540_1 = getitem_983 = copy__392 = None
+	        copy__393: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg541_1, getitem_984);  arg541_1 = getitem_984 = copy__393 = None
+	        copy__394: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg542_1, getitem_985);  arg542_1 = getitem_985 = copy__394 = None
+	        copy__395: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg543_1, getitem_986);  arg543_1 = getitem_986 = copy__395 = None
+	        copy__396: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg544_1, getitem_987);  arg544_1 = getitem_987 = copy__396 = None
+	        copy__397: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg545_1, getitem_988);  arg545_1 = getitem_988 = copy__397 = None
+	        copy__398: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg546_1, getitem_989);  arg546_1 = getitem_989 = copy__398 = None
+	        copy__399: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg547_1, getitem_990);  arg547_1 = getitem_990 = copy__399 = None
+	        copy__400: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg548_1, getitem_991);  arg548_1 = getitem_991 = copy__400 = None
+	        copy__401: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg549_1, getitem_992);  arg549_1 = getitem_992 = copy__401 = None
+	        copy__402: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg550_1, getitem_993);  arg550_1 = getitem_993 = copy__402 = None
+	        copy__403: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg551_1, getitem_994);  arg551_1 = getitem_994 = copy__403 = None
+	        copy__404: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg552_1, getitem_995);  arg552_1 = getitem_995 = copy__404 = None
+	        copy__405: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg553_1, getitem_996);  arg553_1 = getitem_996 = copy__405 = None
+	        copy__406: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg554_1, getitem_997);  arg554_1 = getitem_997 = copy__406 = None
+	        copy__407: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg555_1, getitem_998);  arg555_1 = getitem_998 = copy__407 = None
+	        copy__408: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg556_1, getitem_999);  arg556_1 = getitem_999 = copy__408 = None
+	        copy__409: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg557_1, getitem_1000);  arg557_1 = getitem_1000 = copy__409 = None
+	        copy__410: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg558_1, getitem_1001);  arg558_1 = getitem_1001 = copy__410 = None
+	        copy__411: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg559_1, getitem_1002);  arg559_1 = getitem_1002 = copy__411 = None
+	        copy__412: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg560_1, getitem_1003);  arg560_1 = getitem_1003 = copy__412 = None
+	        copy__413: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg561_1, getitem_1004);  arg561_1 = getitem_1004 = copy__413 = None
+	        copy__414: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg562_1, getitem_1005);  arg562_1 = getitem_1005 = copy__414 = None
+	        copy__415: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg563_1, getitem_1006);  arg563_1 = getitem_1006 = copy__415 = None
+	        copy__416: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg564_1, getitem_1007);  arg564_1 = getitem_1007 = copy__416 = None
+	        copy__417: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg565_1, getitem_1008);  arg565_1 = getitem_1008 = copy__417 = None
+	        copy__418: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg566_1, getitem_1009);  arg566_1 = getitem_1009 = copy__418 = None
+	        copy__419: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg567_1, getitem_1010);  arg567_1 = getitem_1010 = copy__419 = None
+	        copy__420: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg568_1, getitem_1011);  arg568_1 = getitem_1011 = copy__420 = None
+	        copy__421: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg569_1, getitem_1012);  arg569_1 = getitem_1012 = copy__421 = None
+	        copy__422: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg570_1, getitem_1013);  arg570_1 = getitem_1013 = copy__422 = None
+	        copy__423: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg571_1, getitem_1014);  arg571_1 = getitem_1014 = copy__423 = None
+	        copy__424: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg572_1, getitem_1015);  arg572_1 = getitem_1015 = copy__424 = None
+	        copy__425: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg573_1, getitem_1016);  arg573_1 = getitem_1016 = copy__425 = None
+	        copy__426: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg574_1, getitem_1017);  arg574_1 = getitem_1017 = copy__426 = None
+	        copy__427: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg575_1, getitem_1018);  arg575_1 = getitem_1018 = copy__427 = None
+	        copy__428: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg576_1, getitem_1019);  arg576_1 = getitem_1019 = copy__428 = None
+	        copy__429: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg577_1, getitem_1020);  arg577_1 = getitem_1020 = copy__429 = None
+	        copy__430: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg578_1, getitem_1021);  arg578_1 = getitem_1021 = copy__430 = None
+	        copy__431: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg579_1, getitem_1022);  arg579_1 = getitem_1022 = copy__431 = None
+	        copy__432: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg580_1, getitem_1023);  arg580_1 = getitem_1023 = copy__432 = None
+	        copy__433: "f32[2304, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg581_1, getitem_1024);  arg581_1 = getitem_1024 = copy__433 = None
+	        copy__434: "f32[2304][1]cuda:0" = torch.ops.aten.copy_.default(arg582_1, getitem_1025);  arg582_1 = getitem_1025 = copy__434 = None
+	        copy__435: "f32[768, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg583_1, getitem_1026);  arg583_1 = getitem_1026 = copy__435 = None
+	        copy__436: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg584_1, getitem_1027);  arg584_1 = getitem_1027 = copy__436 = None
+	        copy__437: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg585_1, getitem_1028);  arg585_1 = getitem_1028 = copy__437 = None
+	        copy__438: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg586_1, getitem_1029);  arg586_1 = getitem_1029 = copy__438 = None
+	        copy__439: "f32[3072, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(arg587_1, getitem_1030);  arg587_1 = getitem_1030 = copy__439 = None
+	        copy__440: "f32[3072][1]cuda:0" = torch.ops.aten.copy_.default(arg588_1, getitem_1031);  arg588_1 = getitem_1031 = copy__440 = None
+	        copy__441: "f32[768, 3072][3072, 1]cuda:0" = torch.ops.aten.copy_.default(arg589_1, getitem_1032);  arg589_1 = getitem_1032 = copy__441 = None
+	        copy__442: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg590_1, getitem_1033);  arg590_1 = getitem_1033 = copy__442 = None
+	        copy__443: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg591_1, getitem_1034);  arg591_1 = getitem_1034 = copy__443 = None
+	        copy__444: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg592_1, getitem_1035);  arg592_1 = getitem_1035 = copy__444 = None
+	        return ()
+	        
+V0806 13:56:11.480000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4e04a19e840301102c3e8b2b088b3abf"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977771480079.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:13.522000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9f758bbcb2d7a7a4a46da10659086286"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1722977773522011.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:13.523000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2fa9ac69896097e4eb4ed2ad86d20e4f"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977773523916.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:13.524000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "47abc55786f5a7bcd127c7ef1ae29b62"}
+	{
+	"name": "code_gen",
+	"ts": 1722977773524017.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:13.543000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "67d3bdf78f3e6ff33c008448c7b9f286"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977773542979.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:17.133000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "2a4699e4806de3efa88d475f85b1d1cb"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1722977777133789.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:17.134000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "c4dccb40c589272602b6ba5f0a9c3016"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977777134222.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:18.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cdce945b72aad78fb46a9c332c97cf2e"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1722977778887957.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:18.888000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "1c72e346bbb451706a43eaeadd1da1d2"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977778888304.8,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:18.910000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "cc5032c0230219adb044a52018b70d03"}
+	{
+	"name": "WrapperCodeGen.generate",
+	"ts": 1722977778910246.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:18.912000 4107173 torch/_inductor/graph.py:1792] {"inductor_output_code": {"filename": "/tmp/tmp2ln889l5/yo/cyopfy25nuerjsxpoyw3h27mzwbtrvws3a3ylbzpnbpdmyrquagq.py"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "73fb67ed2c75d446b0061a6d9bb8d8a9"}
+	
+	# AOT ID: ['1_inference']
+	from ctypes import c_void_p, c_long
+	import torch
+	import math
+	import random
+	import os
+	import tempfile
+	from math import inf, nan
+	from torch._inductor.hooks import run_intermediate_hooks
+	from torch._inductor.utils import maybe_profile
+	from torch._inductor.codegen.memory_planning import _align as align
+	
+	from torch import device, empty_strided
+	from torch._inductor.async_compile import AsyncCompile
+	from torch._inductor.select_algorithm import extern_kernels
+	from torch._inductor.codegen.multi_kernel import MultiKernelCall
+	
+	aten = torch.ops.aten
+	inductor_ops = torch.ops.inductor
+	_quantized = torch.ops._quantized
+	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
+	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
+	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
+	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
+	alloc_from_pool = torch.ops.inductor._alloc_from_pool
+	async_compile = AsyncCompile()
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/qe/cqegvx7pet6zxtze7xi2d3ife3fsc73adn4fimsnammc2efrmybh.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_0 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32', 152: '*fp32', 153: '*fp32', 154: '*fp32', 155: '*fp32', 156: '*fp32', 157: '*fp32', 158: '*fp32', 159: '*fp32', 160: '*fp32', 161: '*fp32', 162: '*fp32', 163: '*fp32', 164: '*fp32', 165: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_0', 'mutated_arg_names': ['in_ptr0', 'in_ptr1', 'in_ptr10', 'in_ptr11', 'in_ptr12', 'in_ptr13', 'in_ptr14', 'in_ptr15', 'in_ptr16', 'in_ptr17', 'in_ptr18', 'in_ptr19', 'in_ptr2', 'in_ptr20', 'in_ptr21', 'in_ptr22', 'in_ptr23', 'in_ptr24', 'in_ptr25', 'in_ptr26', 'in_ptr27', 'in_ptr28', 'in_ptr29', 'in_ptr3', 'in_ptr30', 'in_ptr31', 'in_ptr32', 'in_ptr33', 'in_ptr34', 'in_ptr35', 'in_ptr36', 'in_ptr37', 'in_ptr38', 'in_ptr39', 'in_ptr4', 'in_ptr40', 'in_ptr41', 'in_ptr42', 'in_ptr43', 'in_ptr44', 'in_ptr45', 'in_ptr46', 'in_ptr47', 'in_ptr48', 'in_ptr49', 'in_ptr5', 'in_ptr50', 'in_ptr51', 'in_ptr52', 'in_ptr53', 'in_ptr54', 'in_ptr55', 'in_ptr56', 'in_ptr57', 'in_ptr58', 'in_ptr59', 'in_ptr6', 'in_ptr60', 'in_ptr61', 'in_ptr62', 'in_ptr63', 'in_ptr64', 'in_ptr65', 'in_ptr66', 'in_ptr67', 'in_ptr68', 'in_ptr69', 'in_ptr7', 'in_ptr70', 'in_ptr71', 'in_ptr72', 'in_ptr73', 'in_ptr74', 'in_ptr75', 'in_ptr76', 'in_ptr77', 'in_ptr78', 'in_ptr79', 'in_ptr8', 'in_ptr80', 'in_ptr81', 'in_ptr82', 'in_ptr9', 'out_ptr0', 'out_ptr1', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr13', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr17', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr21', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr25', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr29', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr33', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr37', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr41', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr45', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr49', 'out_ptr5', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr53', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr57', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr61', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr65', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr69', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr73', 'out_ptr74', 'out_ptr75', 'out_ptr76', 'out_ptr77', 'out_ptr78', 'out_ptr79', 'out_ptr8', 'out_ptr80', 'out_ptr81', 'out_ptr82', 'out_ptr9'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, out_ptr0, out_ptr1, out_ptr2, out_ptr3, out_ptr4, out_ptr5, out_ptr6, out_ptr7, out_ptr8, out_ptr9, out_ptr10, out_ptr11, out_ptr12, out_ptr13, out_ptr14, out_ptr15, out_ptr16, out_ptr17, out_ptr18, out_ptr19, out_ptr20, out_ptr21, out_ptr22, out_ptr23, out_ptr24, out_ptr25, out_ptr26, out_ptr27, out_ptr28, out_ptr29, out_ptr30, out_ptr31, out_ptr32, out_ptr33, out_ptr34, out_ptr35, out_ptr36, out_ptr37, out_ptr38, out_ptr39, out_ptr40, out_ptr41, out_ptr42, out_ptr43, out_ptr44, out_ptr45, out_ptr46, out_ptr47, out_ptr48, out_ptr49, out_ptr50, out_ptr51, out_ptr52, out_ptr53, out_ptr54, out_ptr55, out_ptr56, out_ptr57, out_ptr58, out_ptr59, out_ptr60, out_ptr61, out_ptr62, out_ptr63, out_ptr64, out_ptr65, out_ptr66, out_ptr67, out_ptr68, out_ptr69, out_ptr70, out_ptr71, out_ptr72, out_ptr73, out_ptr74, out_ptr75, out_ptr76, out_ptr77, out_ptr78, out_ptr79, out_ptr80, out_ptr81, out_ptr82):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(1, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_19 = num_xblocks_18 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_20 = num_xblocks_19 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_21 = num_xblocks_20 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_22 = num_xblocks_21 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_23 = num_xblocks_22 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_24 = num_xblocks_23 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_25 = num_xblocks_24 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_26 = num_xblocks_25 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_27 = num_xblocks_26 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_28 = num_xblocks_27 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_29 = num_xblocks_28 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_30 = num_xblocks_29 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_31 = num_xblocks_30 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_32 = num_xblocks_31 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_33 = num_xblocks_32 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_34 = num_xblocks_33 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_35 = num_xblocks_34 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_36 = num_xblocks_35 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_37 = num_xblocks_36 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_38 = num_xblocks_37 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_39 = num_xblocks_38 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_40 = num_xblocks_39 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_41 = num_xblocks_40 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_42 = num_xblocks_41 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_43 = num_xblocks_42 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_44 = num_xblocks_43 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_45 = num_xblocks_44 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_46 = num_xblocks_45 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_47 = num_xblocks_46 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_48 = num_xblocks_47 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_49 = num_xblocks_48 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_50 = num_xblocks_49 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_51 = num_xblocks_50 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_52 = num_xblocks_51 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_53 = num_xblocks_52 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_54 = num_xblocks_53 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_55 = num_xblocks_54 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_56 = num_xblocks_55 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_57 = num_xblocks_56 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_58 = num_xblocks_57 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_59 = num_xblocks_58 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_60 = num_xblocks_59 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_61 = num_xblocks_60 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_62 = num_xblocks_61 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_63 = num_xblocks_62 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_64 = num_xblocks_63 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_65 = num_xblocks_64 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_66 = num_xblocks_65 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_67 = num_xblocks_66 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_68 = num_xblocks_67 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_69 = num_xblocks_68 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_70 = num_xblocks_69 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_71 = num_xblocks_70 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_72 = num_xblocks_71 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_73 = num_xblocks_72 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_74 = num_xblocks_73 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_75 = num_xblocks_74 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_76 = num_xblocks_75 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_77 = num_xblocks_76 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_78 = num_xblocks_77 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_79 = num_xblocks_78 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_80 = num_xblocks_79 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_81 = num_xblocks_80 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_82 = num_xblocks_81 + tl.cdiv(1, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp0 = tl.load(in_ptr0 + (0))
+	        tmp1 = tl.broadcast_to(tmp0, [XBLOCK])
+	        tmp2 = 1.0
+	        tmp3 = tmp1 + tmp2
+	        tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp3, None)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp4 = tl.load(in_ptr1 + (0))
+	        tmp5 = tl.broadcast_to(tmp4, [XBLOCK])
+	        tmp6 = 1.0
+	        tmp7 = tmp5 + tmp6
+	        tl.store(out_ptr1 + (tl.full([XBLOCK], 0, tl.int32)), tmp7, None)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp8 = tl.load(in_ptr2 + (0))
+	        tmp9 = tl.broadcast_to(tmp8, [XBLOCK])
+	        tmp10 = 1.0
+	        tmp11 = tmp9 + tmp10
+	        tl.store(out_ptr2 + (tl.full([XBLOCK], 0, tl.int32)), tmp11, None)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp12 = tl.load(in_ptr3 + (0))
+	        tmp13 = tl.broadcast_to(tmp12, [XBLOCK])
+	        tmp14 = 1.0
+	        tmp15 = tmp13 + tmp14
+	        tl.store(out_ptr3 + (tl.full([XBLOCK], 0, tl.int32)), tmp15, None)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp16 = tl.load(in_ptr4 + (0))
+	        tmp17 = tl.broadcast_to(tmp16, [XBLOCK])
+	        tmp18 = 1.0
+	        tmp19 = tmp17 + tmp18
+	        tl.store(out_ptr4 + (tl.full([XBLOCK], 0, tl.int32)), tmp19, None)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp20 = tl.load(in_ptr5 + (0))
+	        tmp21 = tl.broadcast_to(tmp20, [XBLOCK])
+	        tmp22 = 1.0
+	        tmp23 = tmp21 + tmp22
+	        tl.store(out_ptr5 + (tl.full([XBLOCK], 0, tl.int32)), tmp23, None)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp24 = tl.load(in_ptr6 + (0))
+	        tmp25 = tl.broadcast_to(tmp24, [XBLOCK])
+	        tmp26 = 1.0
+	        tmp27 = tmp25 + tmp26
+	        tl.store(out_ptr6 + (tl.full([XBLOCK], 0, tl.int32)), tmp27, None)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp28 = tl.load(in_ptr7 + (0))
+	        tmp29 = tl.broadcast_to(tmp28, [XBLOCK])
+	        tmp30 = 1.0
+	        tmp31 = tmp29 + tmp30
+	        tl.store(out_ptr7 + (tl.full([XBLOCK], 0, tl.int32)), tmp31, None)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp32 = tl.load(in_ptr8 + (0))
+	        tmp33 = tl.broadcast_to(tmp32, [XBLOCK])
+	        tmp34 = 1.0
+	        tmp35 = tmp33 + tmp34
+	        tl.store(out_ptr8 + (tl.full([XBLOCK], 0, tl.int32)), tmp35, None)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp36 = tl.load(in_ptr9 + (0))
+	        tmp37 = tl.broadcast_to(tmp36, [XBLOCK])
+	        tmp38 = 1.0
+	        tmp39 = tmp37 + tmp38
+	        tl.store(out_ptr9 + (tl.full([XBLOCK], 0, tl.int32)), tmp39, None)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp40 = tl.load(in_ptr10 + (0))
+	        tmp41 = tl.broadcast_to(tmp40, [XBLOCK])
+	        tmp42 = 1.0
+	        tmp43 = tmp41 + tmp42
+	        tl.store(out_ptr10 + (tl.full([XBLOCK], 0, tl.int32)), tmp43, None)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp44 = tl.load(in_ptr11 + (0))
+	        tmp45 = tl.broadcast_to(tmp44, [XBLOCK])
+	        tmp46 = 1.0
+	        tmp47 = tmp45 + tmp46
+	        tl.store(out_ptr11 + (tl.full([XBLOCK], 0, tl.int32)), tmp47, None)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp48 = tl.load(in_ptr12 + (0))
+	        tmp49 = tl.broadcast_to(tmp48, [XBLOCK])
+	        tmp50 = 1.0
+	        tmp51 = tmp49 + tmp50
+	        tl.store(out_ptr12 + (tl.full([XBLOCK], 0, tl.int32)), tmp51, None)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp52 = tl.load(in_ptr13 + (0))
+	        tmp53 = tl.broadcast_to(tmp52, [XBLOCK])
+	        tmp54 = 1.0
+	        tmp55 = tmp53 + tmp54
+	        tl.store(out_ptr13 + (tl.full([XBLOCK], 0, tl.int32)), tmp55, None)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp56 = tl.load(in_ptr14 + (0))
+	        tmp57 = tl.broadcast_to(tmp56, [XBLOCK])
+	        tmp58 = 1.0
+	        tmp59 = tmp57 + tmp58
+	        tl.store(out_ptr14 + (tl.full([XBLOCK], 0, tl.int32)), tmp59, None)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp60 = tl.load(in_ptr15 + (0))
+	        tmp61 = tl.broadcast_to(tmp60, [XBLOCK])
+	        tmp62 = 1.0
+	        tmp63 = tmp61 + tmp62
+	        tl.store(out_ptr15 + (tl.full([XBLOCK], 0, tl.int32)), tmp63, None)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp64 = tl.load(in_ptr16 + (0))
+	        tmp65 = tl.broadcast_to(tmp64, [XBLOCK])
+	        tmp66 = 1.0
+	        tmp67 = tmp65 + tmp66
+	        tl.store(out_ptr16 + (tl.full([XBLOCK], 0, tl.int32)), tmp67, None)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp68 = tl.load(in_ptr17 + (0))
+	        tmp69 = tl.broadcast_to(tmp68, [XBLOCK])
+	        tmp70 = 1.0
+	        tmp71 = tmp69 + tmp70
+	        tl.store(out_ptr17 + (tl.full([XBLOCK], 0, tl.int32)), tmp71, None)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp72 = tl.load(in_ptr18 + (0))
+	        tmp73 = tl.broadcast_to(tmp72, [XBLOCK])
+	        tmp74 = 1.0
+	        tmp75 = tmp73 + tmp74
+	        tl.store(out_ptr18 + (tl.full([XBLOCK], 0, tl.int32)), tmp75, None)
+	    elif pid < num_xblocks_19:
+	        pid_offset = pid - num_xblocks_18
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp76 = tl.load(in_ptr19 + (0))
+	        tmp77 = tl.broadcast_to(tmp76, [XBLOCK])
+	        tmp78 = 1.0
+	        tmp79 = tmp77 + tmp78
+	        tl.store(out_ptr19 + (tl.full([XBLOCK], 0, tl.int32)), tmp79, None)
+	    elif pid < num_xblocks_20:
+	        pid_offset = pid - num_xblocks_19
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp80 = tl.load(in_ptr20 + (0))
+	        tmp81 = tl.broadcast_to(tmp80, [XBLOCK])
+	        tmp82 = 1.0
+	        tmp83 = tmp81 + tmp82
+	        tl.store(out_ptr20 + (tl.full([XBLOCK], 0, tl.int32)), tmp83, None)
+	    elif pid < num_xblocks_21:
+	        pid_offset = pid - num_xblocks_20
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp84 = tl.load(in_ptr21 + (0))
+	        tmp85 = tl.broadcast_to(tmp84, [XBLOCK])
+	        tmp86 = 1.0
+	        tmp87 = tmp85 + tmp86
+	        tl.store(out_ptr21 + (tl.full([XBLOCK], 0, tl.int32)), tmp87, None)
+	    elif pid < num_xblocks_22:
+	        pid_offset = pid - num_xblocks_21
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp88 = tl.load(in_ptr22 + (0))
+	        tmp89 = tl.broadcast_to(tmp88, [XBLOCK])
+	        tmp90 = 1.0
+	        tmp91 = tmp89 + tmp90
+	        tl.store(out_ptr22 + (tl.full([XBLOCK], 0, tl.int32)), tmp91, None)
+	    elif pid < num_xblocks_23:
+	        pid_offset = pid - num_xblocks_22
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp92 = tl.load(in_ptr23 + (0))
+	        tmp93 = tl.broadcast_to(tmp92, [XBLOCK])
+	        tmp94 = 1.0
+	        tmp95 = tmp93 + tmp94
+	        tl.store(out_ptr23 + (tl.full([XBLOCK], 0, tl.int32)), tmp95, None)
+	    elif pid < num_xblocks_24:
+	        pid_offset = pid - num_xblocks_23
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp96 = tl.load(in_ptr24 + (0))
+	        tmp97 = tl.broadcast_to(tmp96, [XBLOCK])
+	        tmp98 = 1.0
+	        tmp99 = tmp97 + tmp98
+	        tl.store(out_ptr24 + (tl.full([XBLOCK], 0, tl.int32)), tmp99, None)
+	    elif pid < num_xblocks_25:
+	        pid_offset = pid - num_xblocks_24
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp100 = tl.load(in_ptr25 + (0))
+	        tmp101 = tl.broadcast_to(tmp100, [XBLOCK])
+	        tmp102 = 1.0
+	        tmp103 = tmp101 + tmp102
+	        tl.store(out_ptr25 + (tl.full([XBLOCK], 0, tl.int32)), tmp103, None)
+	    elif pid < num_xblocks_26:
+	        pid_offset = pid - num_xblocks_25
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp104 = tl.load(in_ptr26 + (0))
+	        tmp105 = tl.broadcast_to(tmp104, [XBLOCK])
+	        tmp106 = 1.0
+	        tmp107 = tmp105 + tmp106
+	        tl.store(out_ptr26 + (tl.full([XBLOCK], 0, tl.int32)), tmp107, None)
+	    elif pid < num_xblocks_27:
+	        pid_offset = pid - num_xblocks_26
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp108 = tl.load(in_ptr27 + (0))
+	        tmp109 = tl.broadcast_to(tmp108, [XBLOCK])
+	        tmp110 = 1.0
+	        tmp111 = tmp109 + tmp110
+	        tl.store(out_ptr27 + (tl.full([XBLOCK], 0, tl.int32)), tmp111, None)
+	    elif pid < num_xblocks_28:
+	        pid_offset = pid - num_xblocks_27
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp112 = tl.load(in_ptr28 + (0))
+	        tmp113 = tl.broadcast_to(tmp112, [XBLOCK])
+	        tmp114 = 1.0
+	        tmp115 = tmp113 + tmp114
+	        tl.store(out_ptr28 + (tl.full([XBLOCK], 0, tl.int32)), tmp115, None)
+	    elif pid < num_xblocks_29:
+	        pid_offset = pid - num_xblocks_28
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp116 = tl.load(in_ptr29 + (0))
+	        tmp117 = tl.broadcast_to(tmp116, [XBLOCK])
+	        tmp118 = 1.0
+	        tmp119 = tmp117 + tmp118
+	        tl.store(out_ptr29 + (tl.full([XBLOCK], 0, tl.int32)), tmp119, None)
+	    elif pid < num_xblocks_30:
+	        pid_offset = pid - num_xblocks_29
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp120 = tl.load(in_ptr30 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp122 = 1.0
+	        tmp123 = tmp121 + tmp122
+	        tl.store(out_ptr30 + (tl.full([XBLOCK], 0, tl.int32)), tmp123, None)
+	    elif pid < num_xblocks_31:
+	        pid_offset = pid - num_xblocks_30
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp124 = tl.load(in_ptr31 + (0))
+	        tmp125 = tl.broadcast_to(tmp124, [XBLOCK])
+	        tmp126 = 1.0
+	        tmp127 = tmp125 + tmp126
+	        tl.store(out_ptr31 + (tl.full([XBLOCK], 0, tl.int32)), tmp127, None)
+	    elif pid < num_xblocks_32:
+	        pid_offset = pid - num_xblocks_31
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp128 = tl.load(in_ptr32 + (0))
+	        tmp129 = tl.broadcast_to(tmp128, [XBLOCK])
+	        tmp130 = 1.0
+	        tmp131 = tmp129 + tmp130
+	        tl.store(out_ptr32 + (tl.full([XBLOCK], 0, tl.int32)), tmp131, None)
+	    elif pid < num_xblocks_33:
+	        pid_offset = pid - num_xblocks_32
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp132 = tl.load(in_ptr33 + (0))
+	        tmp133 = tl.broadcast_to(tmp132, [XBLOCK])
+	        tmp134 = 1.0
+	        tmp135 = tmp133 + tmp134
+	        tl.store(out_ptr33 + (tl.full([XBLOCK], 0, tl.int32)), tmp135, None)
+	    elif pid < num_xblocks_34:
+	        pid_offset = pid - num_xblocks_33
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp136 = tl.load(in_ptr34 + (0))
+	        tmp137 = tl.broadcast_to(tmp136, [XBLOCK])
+	        tmp138 = 1.0
+	        tmp139 = tmp137 + tmp138
+	        tl.store(out_ptr34 + (tl.full([XBLOCK], 0, tl.int32)), tmp139, None)
+	    elif pid < num_xblocks_35:
+	        pid_offset = pid - num_xblocks_34
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp140 = tl.load(in_ptr35 + (0))
+	        tmp141 = tl.broadcast_to(tmp140, [XBLOCK])
+	        tmp142 = 1.0
+	        tmp143 = tmp141 + tmp142
+	        tl.store(out_ptr35 + (tl.full([XBLOCK], 0, tl.int32)), tmp143, None)
+	    elif pid < num_xblocks_36:
+	        pid_offset = pid - num_xblocks_35
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp144 = tl.load(in_ptr36 + (0))
+	        tmp145 = tl.broadcast_to(tmp144, [XBLOCK])
+	        tmp146 = 1.0
+	        tmp147 = tmp145 + tmp146
+	        tl.store(out_ptr36 + (tl.full([XBLOCK], 0, tl.int32)), tmp147, None)
+	    elif pid < num_xblocks_37:
+	        pid_offset = pid - num_xblocks_36
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp148 = tl.load(in_ptr37 + (0))
+	        tmp149 = tl.broadcast_to(tmp148, [XBLOCK])
+	        tmp150 = 1.0
+	        tmp151 = tmp149 + tmp150
+	        tl.store(out_ptr37 + (tl.full([XBLOCK], 0, tl.int32)), tmp151, None)
+	    elif pid < num_xblocks_38:
+	        pid_offset = pid - num_xblocks_37
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp152 = tl.load(in_ptr38 + (0))
+	        tmp153 = tl.broadcast_to(tmp152, [XBLOCK])
+	        tmp154 = 1.0
+	        tmp155 = tmp153 + tmp154
+	        tl.store(out_ptr38 + (tl.full([XBLOCK], 0, tl.int32)), tmp155, None)
+	    elif pid < num_xblocks_39:
+	        pid_offset = pid - num_xblocks_38
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp156 = tl.load(in_ptr39 + (0))
+	        tmp157 = tl.broadcast_to(tmp156, [XBLOCK])
+	        tmp158 = 1.0
+	        tmp159 = tmp157 + tmp158
+	        tl.store(out_ptr39 + (tl.full([XBLOCK], 0, tl.int32)), tmp159, None)
+	    elif pid < num_xblocks_40:
+	        pid_offset = pid - num_xblocks_39
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp160 = tl.load(in_ptr40 + (0))
+	        tmp161 = tl.broadcast_to(tmp160, [XBLOCK])
+	        tmp162 = 1.0
+	        tmp163 = tmp161 + tmp162
+	        tl.store(out_ptr40 + (tl.full([XBLOCK], 0, tl.int32)), tmp163, None)
+	    elif pid < num_xblocks_41:
+	        pid_offset = pid - num_xblocks_40
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp164 = tl.load(in_ptr41 + (0))
+	        tmp165 = tl.broadcast_to(tmp164, [XBLOCK])
+	        tmp166 = 1.0
+	        tmp167 = tmp165 + tmp166
+	        tl.store(out_ptr41 + (tl.full([XBLOCK], 0, tl.int32)), tmp167, None)
+	    elif pid < num_xblocks_42:
+	        pid_offset = pid - num_xblocks_41
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp168 = tl.load(in_ptr42 + (0))
+	        tmp169 = tl.broadcast_to(tmp168, [XBLOCK])
+	        tmp170 = 1.0
+	        tmp171 = tmp169 + tmp170
+	        tl.store(out_ptr42 + (tl.full([XBLOCK], 0, tl.int32)), tmp171, None)
+	    elif pid < num_xblocks_43:
+	        pid_offset = pid - num_xblocks_42
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp172 = tl.load(in_ptr43 + (0))
+	        tmp173 = tl.broadcast_to(tmp172, [XBLOCK])
+	        tmp174 = 1.0
+	        tmp175 = tmp173 + tmp174
+	        tl.store(out_ptr43 + (tl.full([XBLOCK], 0, tl.int32)), tmp175, None)
+	    elif pid < num_xblocks_44:
+	        pid_offset = pid - num_xblocks_43
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp176 = tl.load(in_ptr44 + (0))
+	        tmp177 = tl.broadcast_to(tmp176, [XBLOCK])
+	        tmp178 = 1.0
+	        tmp179 = tmp177 + tmp178
+	        tl.store(out_ptr44 + (tl.full([XBLOCK], 0, tl.int32)), tmp179, None)
+	    elif pid < num_xblocks_45:
+	        pid_offset = pid - num_xblocks_44
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp180 = tl.load(in_ptr45 + (0))
+	        tmp181 = tl.broadcast_to(tmp180, [XBLOCK])
+	        tmp182 = 1.0
+	        tmp183 = tmp181 + tmp182
+	        tl.store(out_ptr45 + (tl.full([XBLOCK], 0, tl.int32)), tmp183, None)
+	    elif pid < num_xblocks_46:
+	        pid_offset = pid - num_xblocks_45
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp184 = tl.load(in_ptr46 + (0))
+	        tmp185 = tl.broadcast_to(tmp184, [XBLOCK])
+	        tmp186 = 1.0
+	        tmp187 = tmp185 + tmp186
+	        tl.store(out_ptr46 + (tl.full([XBLOCK], 0, tl.int32)), tmp187, None)
+	    elif pid < num_xblocks_47:
+	        pid_offset = pid - num_xblocks_46
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp188 = tl.load(in_ptr47 + (0))
+	        tmp189 = tl.broadcast_to(tmp188, [XBLOCK])
+	        tmp190 = 1.0
+	        tmp191 = tmp189 + tmp190
+	        tl.store(out_ptr47 + (tl.full([XBLOCK], 0, tl.int32)), tmp191, None)
+	    elif pid < num_xblocks_48:
+	        pid_offset = pid - num_xblocks_47
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp192 = tl.load(in_ptr48 + (0))
+	        tmp193 = tl.broadcast_to(tmp192, [XBLOCK])
+	        tmp194 = 1.0
+	        tmp195 = tmp193 + tmp194
+	        tl.store(out_ptr48 + (tl.full([XBLOCK], 0, tl.int32)), tmp195, None)
+	    elif pid < num_xblocks_49:
+	        pid_offset = pid - num_xblocks_48
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp196 = tl.load(in_ptr49 + (0))
+	        tmp197 = tl.broadcast_to(tmp196, [XBLOCK])
+	        tmp198 = 1.0
+	        tmp199 = tmp197 + tmp198
+	        tl.store(out_ptr49 + (tl.full([XBLOCK], 0, tl.int32)), tmp199, None)
+	    elif pid < num_xblocks_50:
+	        pid_offset = pid - num_xblocks_49
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp200 = tl.load(in_ptr50 + (0))
+	        tmp201 = tl.broadcast_to(tmp200, [XBLOCK])
+	        tmp202 = 1.0
+	        tmp203 = tmp201 + tmp202
+	        tl.store(out_ptr50 + (tl.full([XBLOCK], 0, tl.int32)), tmp203, None)
+	    elif pid < num_xblocks_51:
+	        pid_offset = pid - num_xblocks_50
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp204 = tl.load(in_ptr51 + (0))
+	        tmp205 = tl.broadcast_to(tmp204, [XBLOCK])
+	        tmp206 = 1.0
+	        tmp207 = tmp205 + tmp206
+	        tl.store(out_ptr51 + (tl.full([XBLOCK], 0, tl.int32)), tmp207, None)
+	    elif pid < num_xblocks_52:
+	        pid_offset = pid - num_xblocks_51
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp208 = tl.load(in_ptr52 + (0))
+	        tmp209 = tl.broadcast_to(tmp208, [XBLOCK])
+	        tmp210 = 1.0
+	        tmp211 = tmp209 + tmp210
+	        tl.store(out_ptr52 + (tl.full([XBLOCK], 0, tl.int32)), tmp211, None)
+	    elif pid < num_xblocks_53:
+	        pid_offset = pid - num_xblocks_52
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp212 = tl.load(in_ptr53 + (0))
+	        tmp213 = tl.broadcast_to(tmp212, [XBLOCK])
+	        tmp214 = 1.0
+	        tmp215 = tmp213 + tmp214
+	        tl.store(out_ptr53 + (tl.full([XBLOCK], 0, tl.int32)), tmp215, None)
+	    elif pid < num_xblocks_54:
+	        pid_offset = pid - num_xblocks_53
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp216 = tl.load(in_ptr54 + (0))
+	        tmp217 = tl.broadcast_to(tmp216, [XBLOCK])
+	        tmp218 = 1.0
+	        tmp219 = tmp217 + tmp218
+	        tl.store(out_ptr54 + (tl.full([XBLOCK], 0, tl.int32)), tmp219, None)
+	    elif pid < num_xblocks_55:
+	        pid_offset = pid - num_xblocks_54
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp220 = tl.load(in_ptr55 + (0))
+	        tmp221 = tl.broadcast_to(tmp220, [XBLOCK])
+	        tmp222 = 1.0
+	        tmp223 = tmp221 + tmp222
+	        tl.store(out_ptr55 + (tl.full([XBLOCK], 0, tl.int32)), tmp223, None)
+	    elif pid < num_xblocks_56:
+	        pid_offset = pid - num_xblocks_55
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp224 = tl.load(in_ptr56 + (0))
+	        tmp225 = tl.broadcast_to(tmp224, [XBLOCK])
+	        tmp226 = 1.0
+	        tmp227 = tmp225 + tmp226
+	        tl.store(out_ptr56 + (tl.full([XBLOCK], 0, tl.int32)), tmp227, None)
+	    elif pid < num_xblocks_57:
+	        pid_offset = pid - num_xblocks_56
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp228 = tl.load(in_ptr57 + (0))
+	        tmp229 = tl.broadcast_to(tmp228, [XBLOCK])
+	        tmp230 = 1.0
+	        tmp231 = tmp229 + tmp230
+	        tl.store(out_ptr57 + (tl.full([XBLOCK], 0, tl.int32)), tmp231, None)
+	    elif pid < num_xblocks_58:
+	        pid_offset = pid - num_xblocks_57
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp232 = tl.load(in_ptr58 + (0))
+	        tmp233 = tl.broadcast_to(tmp232, [XBLOCK])
+	        tmp234 = 1.0
+	        tmp235 = tmp233 + tmp234
+	        tl.store(out_ptr58 + (tl.full([XBLOCK], 0, tl.int32)), tmp235, None)
+	    elif pid < num_xblocks_59:
+	        pid_offset = pid - num_xblocks_58
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp236 = tl.load(in_ptr59 + (0))
+	        tmp237 = tl.broadcast_to(tmp236, [XBLOCK])
+	        tmp238 = 1.0
+	        tmp239 = tmp237 + tmp238
+	        tl.store(out_ptr59 + (tl.full([XBLOCK], 0, tl.int32)), tmp239, None)
+	    elif pid < num_xblocks_60:
+	        pid_offset = pid - num_xblocks_59
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp240 = tl.load(in_ptr60 + (0))
+	        tmp241 = tl.broadcast_to(tmp240, [XBLOCK])
+	        tmp242 = 1.0
+	        tmp243 = tmp241 + tmp242
+	        tl.store(out_ptr60 + (tl.full([XBLOCK], 0, tl.int32)), tmp243, None)
+	    elif pid < num_xblocks_61:
+	        pid_offset = pid - num_xblocks_60
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp244 = tl.load(in_ptr61 + (0))
+	        tmp245 = tl.broadcast_to(tmp244, [XBLOCK])
+	        tmp246 = 1.0
+	        tmp247 = tmp245 + tmp246
+	        tl.store(out_ptr61 + (tl.full([XBLOCK], 0, tl.int32)), tmp247, None)
+	    elif pid < num_xblocks_62:
+	        pid_offset = pid - num_xblocks_61
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp248 = tl.load(in_ptr62 + (0))
+	        tmp249 = tl.broadcast_to(tmp248, [XBLOCK])
+	        tmp250 = 1.0
+	        tmp251 = tmp249 + tmp250
+	        tl.store(out_ptr62 + (tl.full([XBLOCK], 0, tl.int32)), tmp251, None)
+	    elif pid < num_xblocks_63:
+	        pid_offset = pid - num_xblocks_62
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp252 = tl.load(in_ptr63 + (0))
+	        tmp253 = tl.broadcast_to(tmp252, [XBLOCK])
+	        tmp254 = 1.0
+	        tmp255 = tmp253 + tmp254
+	        tl.store(out_ptr63 + (tl.full([XBLOCK], 0, tl.int32)), tmp255, None)
+	    elif pid < num_xblocks_64:
+	        pid_offset = pid - num_xblocks_63
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp256 = tl.load(in_ptr64 + (0))
+	        tmp257 = tl.broadcast_to(tmp256, [XBLOCK])
+	        tmp258 = 1.0
+	        tmp259 = tmp257 + tmp258
+	        tl.store(out_ptr64 + (tl.full([XBLOCK], 0, tl.int32)), tmp259, None)
+	    elif pid < num_xblocks_65:
+	        pid_offset = pid - num_xblocks_64
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp260 = tl.load(in_ptr65 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp262 = 1.0
+	        tmp263 = tmp261 + tmp262
+	        tl.store(out_ptr65 + (tl.full([XBLOCK], 0, tl.int32)), tmp263, None)
+	    elif pid < num_xblocks_66:
+	        pid_offset = pid - num_xblocks_65
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp264 = tl.load(in_ptr66 + (0))
+	        tmp265 = tl.broadcast_to(tmp264, [XBLOCK])
+	        tmp266 = 1.0
+	        tmp267 = tmp265 + tmp266
+	        tl.store(out_ptr66 + (tl.full([XBLOCK], 0, tl.int32)), tmp267, None)
+	    elif pid < num_xblocks_67:
+	        pid_offset = pid - num_xblocks_66
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp268 = tl.load(in_ptr67 + (0))
+	        tmp269 = tl.broadcast_to(tmp268, [XBLOCK])
+	        tmp270 = 1.0
+	        tmp271 = tmp269 + tmp270
+	        tl.store(out_ptr67 + (tl.full([XBLOCK], 0, tl.int32)), tmp271, None)
+	    elif pid < num_xblocks_68:
+	        pid_offset = pid - num_xblocks_67
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp272 = tl.load(in_ptr68 + (0))
+	        tmp273 = tl.broadcast_to(tmp272, [XBLOCK])
+	        tmp274 = 1.0
+	        tmp275 = tmp273 + tmp274
+	        tl.store(out_ptr68 + (tl.full([XBLOCK], 0, tl.int32)), tmp275, None)
+	    elif pid < num_xblocks_69:
+	        pid_offset = pid - num_xblocks_68
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp276 = tl.load(in_ptr69 + (0))
+	        tmp277 = tl.broadcast_to(tmp276, [XBLOCK])
+	        tmp278 = 1.0
+	        tmp279 = tmp277 + tmp278
+	        tl.store(out_ptr69 + (tl.full([XBLOCK], 0, tl.int32)), tmp279, None)
+	    elif pid < num_xblocks_70:
+	        pid_offset = pid - num_xblocks_69
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp280 = tl.load(in_ptr70 + (0))
+	        tmp281 = tl.broadcast_to(tmp280, [XBLOCK])
+	        tmp282 = 1.0
+	        tmp283 = tmp281 + tmp282
+	        tl.store(out_ptr70 + (tl.full([XBLOCK], 0, tl.int32)), tmp283, None)
+	    elif pid < num_xblocks_71:
+	        pid_offset = pid - num_xblocks_70
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp284 = tl.load(in_ptr71 + (0))
+	        tmp285 = tl.broadcast_to(tmp284, [XBLOCK])
+	        tmp286 = 1.0
+	        tmp287 = tmp285 + tmp286
+	        tl.store(out_ptr71 + (tl.full([XBLOCK], 0, tl.int32)), tmp287, None)
+	    elif pid < num_xblocks_72:
+	        pid_offset = pid - num_xblocks_71
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp288 = tl.load(in_ptr72 + (0))
+	        tmp289 = tl.broadcast_to(tmp288, [XBLOCK])
+	        tmp290 = 1.0
+	        tmp291 = tmp289 + tmp290
+	        tl.store(out_ptr72 + (tl.full([XBLOCK], 0, tl.int32)), tmp291, None)
+	    elif pid < num_xblocks_73:
+	        pid_offset = pid - num_xblocks_72
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp292 = tl.load(in_ptr73 + (0))
+	        tmp293 = tl.broadcast_to(tmp292, [XBLOCK])
+	        tmp294 = 1.0
+	        tmp295 = tmp293 + tmp294
+	        tl.store(out_ptr73 + (tl.full([XBLOCK], 0, tl.int32)), tmp295, None)
+	    elif pid < num_xblocks_74:
+	        pid_offset = pid - num_xblocks_73
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp296 = tl.load(in_ptr74 + (0))
+	        tmp297 = tl.broadcast_to(tmp296, [XBLOCK])
+	        tmp298 = 1.0
+	        tmp299 = tmp297 + tmp298
+	        tl.store(out_ptr74 + (tl.full([XBLOCK], 0, tl.int32)), tmp299, None)
+	    elif pid < num_xblocks_75:
+	        pid_offset = pid - num_xblocks_74
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp300 = tl.load(in_ptr75 + (0))
+	        tmp301 = tl.broadcast_to(tmp300, [XBLOCK])
+	        tmp302 = 1.0
+	        tmp303 = tmp301 + tmp302
+	        tl.store(out_ptr75 + (tl.full([XBLOCK], 0, tl.int32)), tmp303, None)
+	    elif pid < num_xblocks_76:
+	        pid_offset = pid - num_xblocks_75
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp304 = tl.load(in_ptr76 + (0))
+	        tmp305 = tl.broadcast_to(tmp304, [XBLOCK])
+	        tmp306 = 1.0
+	        tmp307 = tmp305 + tmp306
+	        tl.store(out_ptr76 + (tl.full([XBLOCK], 0, tl.int32)), tmp307, None)
+	    elif pid < num_xblocks_77:
+	        pid_offset = pid - num_xblocks_76
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp308 = tl.load(in_ptr77 + (0))
+	        tmp309 = tl.broadcast_to(tmp308, [XBLOCK])
+	        tmp310 = 1.0
+	        tmp311 = tmp309 + tmp310
+	        tl.store(out_ptr77 + (tl.full([XBLOCK], 0, tl.int32)), tmp311, None)
+	    elif pid < num_xblocks_78:
+	        pid_offset = pid - num_xblocks_77
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp312 = tl.load(in_ptr78 + (0))
+	        tmp313 = tl.broadcast_to(tmp312, [XBLOCK])
+	        tmp314 = 1.0
+	        tmp315 = tmp313 + tmp314
+	        tl.store(out_ptr78 + (tl.full([XBLOCK], 0, tl.int32)), tmp315, None)
+	    elif pid < num_xblocks_79:
+	        pid_offset = pid - num_xblocks_78
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp316 = tl.load(in_ptr79 + (0))
+	        tmp317 = tl.broadcast_to(tmp316, [XBLOCK])
+	        tmp318 = 1.0
+	        tmp319 = tmp317 + tmp318
+	        tl.store(out_ptr79 + (tl.full([XBLOCK], 0, tl.int32)), tmp319, None)
+	    elif pid < num_xblocks_80:
+	        pid_offset = pid - num_xblocks_79
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp320 = tl.load(in_ptr80 + (0))
+	        tmp321 = tl.broadcast_to(tmp320, [XBLOCK])
+	        tmp322 = 1.0
+	        tmp323 = tmp321 + tmp322
+	        tl.store(out_ptr80 + (tl.full([XBLOCK], 0, tl.int32)), tmp323, None)
+	    elif pid < num_xblocks_81:
+	        pid_offset = pid - num_xblocks_80
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp324 = tl.load(in_ptr81 + (0))
+	        tmp325 = tl.broadcast_to(tmp324, [XBLOCK])
+	        tmp326 = 1.0
+	        tmp327 = tmp325 + tmp326
+	        tl.store(out_ptr81 + (tl.full([XBLOCK], 0, tl.int32)), tmp327, None)
+	    elif pid < num_xblocks_82:
+	        pid_offset = pid - num_xblocks_81
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp328 = tl.load(in_ptr82 + (0))
+	        tmp329 = tl.broadcast_to(tmp328, [XBLOCK])
+	        tmp330 = 1.0
+	        tmp331 = tmp329 + tmp330
+	        tl.store(out_ptr82 + (tl.full([XBLOCK], 0, tl.int32)), tmp331, None)
+	    else:
+	        pass
+	''', device_str='cuda')
+	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
+	
+	import triton
+	import triton.language as tl
+	from torch._inductor.runtime.triton_heuristics import grid, split_scan_grid, grid_combo_kernels, start_graph, end_graph
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/qy/cqyob3vusnsr6m3lyzdsbv63hei53t3lq7sf2utuh2x5menn4h2w.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_1 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_1', 'mutated_arg_names': ['in_ptr0', 'in_ptr1', 'in_ptr10', 'in_ptr11', 'in_ptr12', 'in_ptr13', 'in_ptr14', 'in_ptr15', 'in_ptr16', 'in_ptr17', 'in_ptr18', 'in_ptr19', 'in_ptr2', 'in_ptr20', 'in_ptr21', 'in_ptr22', 'in_ptr23', 'in_ptr24', 'in_ptr25', 'in_ptr26', 'in_ptr27', 'in_ptr28', 'in_ptr29', 'in_ptr3', 'in_ptr30', 'in_ptr31', 'in_ptr32', 'in_ptr33', 'in_ptr34', 'in_ptr35', 'in_ptr36', 'in_ptr37', 'in_ptr38', 'in_ptr39', 'in_ptr4', 'in_ptr40', 'in_ptr41', 'in_ptr42', 'in_ptr43', 'in_ptr44', 'in_ptr45', 'in_ptr46', 'in_ptr47', 'in_ptr48', 'in_ptr49', 'in_ptr5', 'in_ptr50', 'in_ptr51', 'in_ptr52', 'in_ptr53', 'in_ptr54', 'in_ptr55', 'in_ptr56', 'in_ptr57', 'in_ptr58', 'in_ptr59', 'in_ptr6', 'in_ptr60', 'in_ptr61', 'in_ptr62', 'in_ptr63', 'in_ptr64', 'in_ptr7', 'in_ptr8', 'in_ptr9', 'out_ptr0', 'out_ptr1', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr13', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr17', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr21', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr25', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr29', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr33', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr37', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr41', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr45', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr49', 'out_ptr5', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr53', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr57', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr61', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr7', 'out_ptr8', 'out_ptr9'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, out_ptr0, out_ptr1, out_ptr2, out_ptr3, out_ptr4, out_ptr5, out_ptr6, out_ptr7, out_ptr8, out_ptr9, out_ptr10, out_ptr11, out_ptr12, out_ptr13, out_ptr14, out_ptr15, out_ptr16, out_ptr17, out_ptr18, out_ptr19, out_ptr20, out_ptr21, out_ptr22, out_ptr23, out_ptr24, out_ptr25, out_ptr26, out_ptr27, out_ptr28, out_ptr29, out_ptr30, out_ptr31, out_ptr32, out_ptr33, out_ptr34, out_ptr35, out_ptr36, out_ptr37, out_ptr38, out_ptr39, out_ptr40, out_ptr41, out_ptr42, out_ptr43, out_ptr44, out_ptr45, out_ptr46, out_ptr47, out_ptr48, out_ptr49, out_ptr50, out_ptr51, out_ptr52, out_ptr53, out_ptr54, out_ptr55, out_ptr56, out_ptr57, out_ptr58, out_ptr59, out_ptr60, out_ptr61, out_ptr62, out_ptr63, out_ptr64):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(1, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_19 = num_xblocks_18 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_20 = num_xblocks_19 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_21 = num_xblocks_20 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_22 = num_xblocks_21 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_23 = num_xblocks_22 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_24 = num_xblocks_23 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_25 = num_xblocks_24 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_26 = num_xblocks_25 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_27 = num_xblocks_26 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_28 = num_xblocks_27 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_29 = num_xblocks_28 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_30 = num_xblocks_29 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_31 = num_xblocks_30 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_32 = num_xblocks_31 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_33 = num_xblocks_32 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_34 = num_xblocks_33 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_35 = num_xblocks_34 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_36 = num_xblocks_35 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_37 = num_xblocks_36 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_38 = num_xblocks_37 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_39 = num_xblocks_38 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_40 = num_xblocks_39 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_41 = num_xblocks_40 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_42 = num_xblocks_41 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_43 = num_xblocks_42 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_44 = num_xblocks_43 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_45 = num_xblocks_44 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_46 = num_xblocks_45 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_47 = num_xblocks_46 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_48 = num_xblocks_47 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_49 = num_xblocks_48 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_50 = num_xblocks_49 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_51 = num_xblocks_50 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_52 = num_xblocks_51 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_53 = num_xblocks_52 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_54 = num_xblocks_53 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_55 = num_xblocks_54 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_56 = num_xblocks_55 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_57 = num_xblocks_56 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_58 = num_xblocks_57 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_59 = num_xblocks_58 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_60 = num_xblocks_59 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_61 = num_xblocks_60 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_62 = num_xblocks_61 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_63 = num_xblocks_62 + tl.cdiv(1, XBLOCK)
+	    num_xblocks_64 = num_xblocks_63 + tl.cdiv(1, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp0 = tl.load(in_ptr0 + (0))
+	        tmp1 = tl.broadcast_to(tmp0, [XBLOCK])
+	        tmp2 = 1.0
+	        tmp3 = tmp1 + tmp2
+	        tl.store(out_ptr0 + (tl.full([XBLOCK], 0, tl.int32)), tmp3, None)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp4 = tl.load(in_ptr1 + (0))
+	        tmp5 = tl.broadcast_to(tmp4, [XBLOCK])
+	        tmp6 = 1.0
+	        tmp7 = tmp5 + tmp6
+	        tl.store(out_ptr1 + (tl.full([XBLOCK], 0, tl.int32)), tmp7, None)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp8 = tl.load(in_ptr2 + (0))
+	        tmp9 = tl.broadcast_to(tmp8, [XBLOCK])
+	        tmp10 = 1.0
+	        tmp11 = tmp9 + tmp10
+	        tl.store(out_ptr2 + (tl.full([XBLOCK], 0, tl.int32)), tmp11, None)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp12 = tl.load(in_ptr3 + (0))
+	        tmp13 = tl.broadcast_to(tmp12, [XBLOCK])
+	        tmp14 = 1.0
+	        tmp15 = tmp13 + tmp14
+	        tl.store(out_ptr3 + (tl.full([XBLOCK], 0, tl.int32)), tmp15, None)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp16 = tl.load(in_ptr4 + (0))
+	        tmp17 = tl.broadcast_to(tmp16, [XBLOCK])
+	        tmp18 = 1.0
+	        tmp19 = tmp17 + tmp18
+	        tl.store(out_ptr4 + (tl.full([XBLOCK], 0, tl.int32)), tmp19, None)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp20 = tl.load(in_ptr5 + (0))
+	        tmp21 = tl.broadcast_to(tmp20, [XBLOCK])
+	        tmp22 = 1.0
+	        tmp23 = tmp21 + tmp22
+	        tl.store(out_ptr5 + (tl.full([XBLOCK], 0, tl.int32)), tmp23, None)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp24 = tl.load(in_ptr6 + (0))
+	        tmp25 = tl.broadcast_to(tmp24, [XBLOCK])
+	        tmp26 = 1.0
+	        tmp27 = tmp25 + tmp26
+	        tl.store(out_ptr6 + (tl.full([XBLOCK], 0, tl.int32)), tmp27, None)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp28 = tl.load(in_ptr7 + (0))
+	        tmp29 = tl.broadcast_to(tmp28, [XBLOCK])
+	        tmp30 = 1.0
+	        tmp31 = tmp29 + tmp30
+	        tl.store(out_ptr7 + (tl.full([XBLOCK], 0, tl.int32)), tmp31, None)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp32 = tl.load(in_ptr8 + (0))
+	        tmp33 = tl.broadcast_to(tmp32, [XBLOCK])
+	        tmp34 = 1.0
+	        tmp35 = tmp33 + tmp34
+	        tl.store(out_ptr8 + (tl.full([XBLOCK], 0, tl.int32)), tmp35, None)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp36 = tl.load(in_ptr9 + (0))
+	        tmp37 = tl.broadcast_to(tmp36, [XBLOCK])
+	        tmp38 = 1.0
+	        tmp39 = tmp37 + tmp38
+	        tl.store(out_ptr9 + (tl.full([XBLOCK], 0, tl.int32)), tmp39, None)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp40 = tl.load(in_ptr10 + (0))
+	        tmp41 = tl.broadcast_to(tmp40, [XBLOCK])
+	        tmp42 = 1.0
+	        tmp43 = tmp41 + tmp42
+	        tl.store(out_ptr10 + (tl.full([XBLOCK], 0, tl.int32)), tmp43, None)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp44 = tl.load(in_ptr11 + (0))
+	        tmp45 = tl.broadcast_to(tmp44, [XBLOCK])
+	        tmp46 = 1.0
+	        tmp47 = tmp45 + tmp46
+	        tl.store(out_ptr11 + (tl.full([XBLOCK], 0, tl.int32)), tmp47, None)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp48 = tl.load(in_ptr12 + (0))
+	        tmp49 = tl.broadcast_to(tmp48, [XBLOCK])
+	        tmp50 = 1.0
+	        tmp51 = tmp49 + tmp50
+	        tl.store(out_ptr12 + (tl.full([XBLOCK], 0, tl.int32)), tmp51, None)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp52 = tl.load(in_ptr13 + (0))
+	        tmp53 = tl.broadcast_to(tmp52, [XBLOCK])
+	        tmp54 = 1.0
+	        tmp55 = tmp53 + tmp54
+	        tl.store(out_ptr13 + (tl.full([XBLOCK], 0, tl.int32)), tmp55, None)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp56 = tl.load(in_ptr14 + (0))
+	        tmp57 = tl.broadcast_to(tmp56, [XBLOCK])
+	        tmp58 = 1.0
+	        tmp59 = tmp57 + tmp58
+	        tl.store(out_ptr14 + (tl.full([XBLOCK], 0, tl.int32)), tmp59, None)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp60 = tl.load(in_ptr15 + (0))
+	        tmp61 = tl.broadcast_to(tmp60, [XBLOCK])
+	        tmp62 = 1.0
+	        tmp63 = tmp61 + tmp62
+	        tl.store(out_ptr15 + (tl.full([XBLOCK], 0, tl.int32)), tmp63, None)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp64 = tl.load(in_ptr16 + (0))
+	        tmp65 = tl.broadcast_to(tmp64, [XBLOCK])
+	        tmp66 = 1.0
+	        tmp67 = tmp65 + tmp66
+	        tl.store(out_ptr16 + (tl.full([XBLOCK], 0, tl.int32)), tmp67, None)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp68 = tl.load(in_ptr17 + (0))
+	        tmp69 = tl.broadcast_to(tmp68, [XBLOCK])
+	        tmp70 = 1.0
+	        tmp71 = tmp69 + tmp70
+	        tl.store(out_ptr17 + (tl.full([XBLOCK], 0, tl.int32)), tmp71, None)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp72 = tl.load(in_ptr18 + (0))
+	        tmp73 = tl.broadcast_to(tmp72, [XBLOCK])
+	        tmp74 = 1.0
+	        tmp75 = tmp73 + tmp74
+	        tl.store(out_ptr18 + (tl.full([XBLOCK], 0, tl.int32)), tmp75, None)
+	    elif pid < num_xblocks_19:
+	        pid_offset = pid - num_xblocks_18
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp76 = tl.load(in_ptr19 + (0))
+	        tmp77 = tl.broadcast_to(tmp76, [XBLOCK])
+	        tmp78 = 1.0
+	        tmp79 = tmp77 + tmp78
+	        tl.store(out_ptr19 + (tl.full([XBLOCK], 0, tl.int32)), tmp79, None)
+	    elif pid < num_xblocks_20:
+	        pid_offset = pid - num_xblocks_19
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp80 = tl.load(in_ptr20 + (0))
+	        tmp81 = tl.broadcast_to(tmp80, [XBLOCK])
+	        tmp82 = 1.0
+	        tmp83 = tmp81 + tmp82
+	        tl.store(out_ptr20 + (tl.full([XBLOCK], 0, tl.int32)), tmp83, None)
+	    elif pid < num_xblocks_21:
+	        pid_offset = pid - num_xblocks_20
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp84 = tl.load(in_ptr21 + (0))
+	        tmp85 = tl.broadcast_to(tmp84, [XBLOCK])
+	        tmp86 = 1.0
+	        tmp87 = tmp85 + tmp86
+	        tl.store(out_ptr21 + (tl.full([XBLOCK], 0, tl.int32)), tmp87, None)
+	    elif pid < num_xblocks_22:
+	        pid_offset = pid - num_xblocks_21
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp88 = tl.load(in_ptr22 + (0))
+	        tmp89 = tl.broadcast_to(tmp88, [XBLOCK])
+	        tmp90 = 1.0
+	        tmp91 = tmp89 + tmp90
+	        tl.store(out_ptr22 + (tl.full([XBLOCK], 0, tl.int32)), tmp91, None)
+	    elif pid < num_xblocks_23:
+	        pid_offset = pid - num_xblocks_22
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp92 = tl.load(in_ptr23 + (0))
+	        tmp93 = tl.broadcast_to(tmp92, [XBLOCK])
+	        tmp94 = 1.0
+	        tmp95 = tmp93 + tmp94
+	        tl.store(out_ptr23 + (tl.full([XBLOCK], 0, tl.int32)), tmp95, None)
+	    elif pid < num_xblocks_24:
+	        pid_offset = pid - num_xblocks_23
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp96 = tl.load(in_ptr24 + (0))
+	        tmp97 = tl.broadcast_to(tmp96, [XBLOCK])
+	        tmp98 = 1.0
+	        tmp99 = tmp97 + tmp98
+	        tl.store(out_ptr24 + (tl.full([XBLOCK], 0, tl.int32)), tmp99, None)
+	    elif pid < num_xblocks_25:
+	        pid_offset = pid - num_xblocks_24
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp100 = tl.load(in_ptr25 + (0))
+	        tmp101 = tl.broadcast_to(tmp100, [XBLOCK])
+	        tmp102 = 1.0
+	        tmp103 = tmp101 + tmp102
+	        tl.store(out_ptr25 + (tl.full([XBLOCK], 0, tl.int32)), tmp103, None)
+	    elif pid < num_xblocks_26:
+	        pid_offset = pid - num_xblocks_25
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp104 = tl.load(in_ptr26 + (0))
+	        tmp105 = tl.broadcast_to(tmp104, [XBLOCK])
+	        tmp106 = 1.0
+	        tmp107 = tmp105 + tmp106
+	        tl.store(out_ptr26 + (tl.full([XBLOCK], 0, tl.int32)), tmp107, None)
+	    elif pid < num_xblocks_27:
+	        pid_offset = pid - num_xblocks_26
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp108 = tl.load(in_ptr27 + (0))
+	        tmp109 = tl.broadcast_to(tmp108, [XBLOCK])
+	        tmp110 = 1.0
+	        tmp111 = tmp109 + tmp110
+	        tl.store(out_ptr27 + (tl.full([XBLOCK], 0, tl.int32)), tmp111, None)
+	    elif pid < num_xblocks_28:
+	        pid_offset = pid - num_xblocks_27
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp112 = tl.load(in_ptr28 + (0))
+	        tmp113 = tl.broadcast_to(tmp112, [XBLOCK])
+	        tmp114 = 1.0
+	        tmp115 = tmp113 + tmp114
+	        tl.store(out_ptr28 + (tl.full([XBLOCK], 0, tl.int32)), tmp115, None)
+	    elif pid < num_xblocks_29:
+	        pid_offset = pid - num_xblocks_28
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp116 = tl.load(in_ptr29 + (0))
+	        tmp117 = tl.broadcast_to(tmp116, [XBLOCK])
+	        tmp118 = 1.0
+	        tmp119 = tmp117 + tmp118
+	        tl.store(out_ptr29 + (tl.full([XBLOCK], 0, tl.int32)), tmp119, None)
+	    elif pid < num_xblocks_30:
+	        pid_offset = pid - num_xblocks_29
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp120 = tl.load(in_ptr30 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp122 = 1.0
+	        tmp123 = tmp121 + tmp122
+	        tl.store(out_ptr30 + (tl.full([XBLOCK], 0, tl.int32)), tmp123, None)
+	    elif pid < num_xblocks_31:
+	        pid_offset = pid - num_xblocks_30
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp124 = tl.load(in_ptr31 + (0))
+	        tmp125 = tl.broadcast_to(tmp124, [XBLOCK])
+	        tmp126 = 1.0
+	        tmp127 = tmp125 + tmp126
+	        tl.store(out_ptr31 + (tl.full([XBLOCK], 0, tl.int32)), tmp127, None)
+	    elif pid < num_xblocks_32:
+	        pid_offset = pid - num_xblocks_31
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp128 = tl.load(in_ptr32 + (0))
+	        tmp129 = tl.broadcast_to(tmp128, [XBLOCK])
+	        tmp130 = 1.0
+	        tmp131 = tmp129 + tmp130
+	        tl.store(out_ptr32 + (tl.full([XBLOCK], 0, tl.int32)), tmp131, None)
+	    elif pid < num_xblocks_33:
+	        pid_offset = pid - num_xblocks_32
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp132 = tl.load(in_ptr33 + (0))
+	        tmp133 = tl.broadcast_to(tmp132, [XBLOCK])
+	        tmp134 = 1.0
+	        tmp135 = tmp133 + tmp134
+	        tl.store(out_ptr33 + (tl.full([XBLOCK], 0, tl.int32)), tmp135, None)
+	    elif pid < num_xblocks_34:
+	        pid_offset = pid - num_xblocks_33
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp136 = tl.load(in_ptr34 + (0))
+	        tmp137 = tl.broadcast_to(tmp136, [XBLOCK])
+	        tmp138 = 1.0
+	        tmp139 = tmp137 + tmp138
+	        tl.store(out_ptr34 + (tl.full([XBLOCK], 0, tl.int32)), tmp139, None)
+	    elif pid < num_xblocks_35:
+	        pid_offset = pid - num_xblocks_34
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp140 = tl.load(in_ptr35 + (0))
+	        tmp141 = tl.broadcast_to(tmp140, [XBLOCK])
+	        tmp142 = 1.0
+	        tmp143 = tmp141 + tmp142
+	        tl.store(out_ptr35 + (tl.full([XBLOCK], 0, tl.int32)), tmp143, None)
+	    elif pid < num_xblocks_36:
+	        pid_offset = pid - num_xblocks_35
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp144 = tl.load(in_ptr36 + (0))
+	        tmp145 = tl.broadcast_to(tmp144, [XBLOCK])
+	        tmp146 = 1.0
+	        tmp147 = tmp145 + tmp146
+	        tl.store(out_ptr36 + (tl.full([XBLOCK], 0, tl.int32)), tmp147, None)
+	    elif pid < num_xblocks_37:
+	        pid_offset = pid - num_xblocks_36
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp148 = tl.load(in_ptr37 + (0))
+	        tmp149 = tl.broadcast_to(tmp148, [XBLOCK])
+	        tmp150 = 1.0
+	        tmp151 = tmp149 + tmp150
+	        tl.store(out_ptr37 + (tl.full([XBLOCK], 0, tl.int32)), tmp151, None)
+	    elif pid < num_xblocks_38:
+	        pid_offset = pid - num_xblocks_37
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp152 = tl.load(in_ptr38 + (0))
+	        tmp153 = tl.broadcast_to(tmp152, [XBLOCK])
+	        tmp154 = 1.0
+	        tmp155 = tmp153 + tmp154
+	        tl.store(out_ptr38 + (tl.full([XBLOCK], 0, tl.int32)), tmp155, None)
+	    elif pid < num_xblocks_39:
+	        pid_offset = pid - num_xblocks_38
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp156 = tl.load(in_ptr39 + (0))
+	        tmp157 = tl.broadcast_to(tmp156, [XBLOCK])
+	        tmp158 = 1.0
+	        tmp159 = tmp157 + tmp158
+	        tl.store(out_ptr39 + (tl.full([XBLOCK], 0, tl.int32)), tmp159, None)
+	    elif pid < num_xblocks_40:
+	        pid_offset = pid - num_xblocks_39
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp160 = tl.load(in_ptr40 + (0))
+	        tmp161 = tl.broadcast_to(tmp160, [XBLOCK])
+	        tmp162 = 1.0
+	        tmp163 = tmp161 + tmp162
+	        tl.store(out_ptr40 + (tl.full([XBLOCK], 0, tl.int32)), tmp163, None)
+	    elif pid < num_xblocks_41:
+	        pid_offset = pid - num_xblocks_40
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp164 = tl.load(in_ptr41 + (0))
+	        tmp165 = tl.broadcast_to(tmp164, [XBLOCK])
+	        tmp166 = 1.0
+	        tmp167 = tmp165 + tmp166
+	        tl.store(out_ptr41 + (tl.full([XBLOCK], 0, tl.int32)), tmp167, None)
+	    elif pid < num_xblocks_42:
+	        pid_offset = pid - num_xblocks_41
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp168 = tl.load(in_ptr42 + (0))
+	        tmp169 = tl.broadcast_to(tmp168, [XBLOCK])
+	        tmp170 = 1.0
+	        tmp171 = tmp169 + tmp170
+	        tl.store(out_ptr42 + (tl.full([XBLOCK], 0, tl.int32)), tmp171, None)
+	    elif pid < num_xblocks_43:
+	        pid_offset = pid - num_xblocks_42
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp172 = tl.load(in_ptr43 + (0))
+	        tmp173 = tl.broadcast_to(tmp172, [XBLOCK])
+	        tmp174 = 1.0
+	        tmp175 = tmp173 + tmp174
+	        tl.store(out_ptr43 + (tl.full([XBLOCK], 0, tl.int32)), tmp175, None)
+	    elif pid < num_xblocks_44:
+	        pid_offset = pid - num_xblocks_43
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp176 = tl.load(in_ptr44 + (0))
+	        tmp177 = tl.broadcast_to(tmp176, [XBLOCK])
+	        tmp178 = 1.0
+	        tmp179 = tmp177 + tmp178
+	        tl.store(out_ptr44 + (tl.full([XBLOCK], 0, tl.int32)), tmp179, None)
+	    elif pid < num_xblocks_45:
+	        pid_offset = pid - num_xblocks_44
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp180 = tl.load(in_ptr45 + (0))
+	        tmp181 = tl.broadcast_to(tmp180, [XBLOCK])
+	        tmp182 = 1.0
+	        tmp183 = tmp181 + tmp182
+	        tl.store(out_ptr45 + (tl.full([XBLOCK], 0, tl.int32)), tmp183, None)
+	    elif pid < num_xblocks_46:
+	        pid_offset = pid - num_xblocks_45
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp184 = tl.load(in_ptr46 + (0))
+	        tmp185 = tl.broadcast_to(tmp184, [XBLOCK])
+	        tmp186 = 1.0
+	        tmp187 = tmp185 + tmp186
+	        tl.store(out_ptr46 + (tl.full([XBLOCK], 0, tl.int32)), tmp187, None)
+	    elif pid < num_xblocks_47:
+	        pid_offset = pid - num_xblocks_46
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp188 = tl.load(in_ptr47 + (0))
+	        tmp189 = tl.broadcast_to(tmp188, [XBLOCK])
+	        tmp190 = 1.0
+	        tmp191 = tmp189 + tmp190
+	        tl.store(out_ptr47 + (tl.full([XBLOCK], 0, tl.int32)), tmp191, None)
+	    elif pid < num_xblocks_48:
+	        pid_offset = pid - num_xblocks_47
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp192 = tl.load(in_ptr48 + (0))
+	        tmp193 = tl.broadcast_to(tmp192, [XBLOCK])
+	        tmp194 = 1.0
+	        tmp195 = tmp193 + tmp194
+	        tl.store(out_ptr48 + (tl.full([XBLOCK], 0, tl.int32)), tmp195, None)
+	    elif pid < num_xblocks_49:
+	        pid_offset = pid - num_xblocks_48
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp196 = tl.load(in_ptr49 + (0))
+	        tmp197 = tl.broadcast_to(tmp196, [XBLOCK])
+	        tmp198 = 1.0
+	        tmp199 = tmp197 + tmp198
+	        tl.store(out_ptr49 + (tl.full([XBLOCK], 0, tl.int32)), tmp199, None)
+	    elif pid < num_xblocks_50:
+	        pid_offset = pid - num_xblocks_49
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp200 = tl.load(in_ptr50 + (0))
+	        tmp201 = tl.broadcast_to(tmp200, [XBLOCK])
+	        tmp202 = 1.0
+	        tmp203 = tmp201 + tmp202
+	        tl.store(out_ptr50 + (tl.full([XBLOCK], 0, tl.int32)), tmp203, None)
+	    elif pid < num_xblocks_51:
+	        pid_offset = pid - num_xblocks_50
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp204 = tl.load(in_ptr51 + (0))
+	        tmp205 = tl.broadcast_to(tmp204, [XBLOCK])
+	        tmp206 = 1.0
+	        tmp207 = tmp205 + tmp206
+	        tl.store(out_ptr51 + (tl.full([XBLOCK], 0, tl.int32)), tmp207, None)
+	    elif pid < num_xblocks_52:
+	        pid_offset = pid - num_xblocks_51
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp208 = tl.load(in_ptr52 + (0))
+	        tmp209 = tl.broadcast_to(tmp208, [XBLOCK])
+	        tmp210 = 1.0
+	        tmp211 = tmp209 + tmp210
+	        tl.store(out_ptr52 + (tl.full([XBLOCK], 0, tl.int32)), tmp211, None)
+	    elif pid < num_xblocks_53:
+	        pid_offset = pid - num_xblocks_52
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp212 = tl.load(in_ptr53 + (0))
+	        tmp213 = tl.broadcast_to(tmp212, [XBLOCK])
+	        tmp214 = 1.0
+	        tmp215 = tmp213 + tmp214
+	        tl.store(out_ptr53 + (tl.full([XBLOCK], 0, tl.int32)), tmp215, None)
+	    elif pid < num_xblocks_54:
+	        pid_offset = pid - num_xblocks_53
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp216 = tl.load(in_ptr54 + (0))
+	        tmp217 = tl.broadcast_to(tmp216, [XBLOCK])
+	        tmp218 = 1.0
+	        tmp219 = tmp217 + tmp218
+	        tl.store(out_ptr54 + (tl.full([XBLOCK], 0, tl.int32)), tmp219, None)
+	    elif pid < num_xblocks_55:
+	        pid_offset = pid - num_xblocks_54
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp220 = tl.load(in_ptr55 + (0))
+	        tmp221 = tl.broadcast_to(tmp220, [XBLOCK])
+	        tmp222 = 1.0
+	        tmp223 = tmp221 + tmp222
+	        tl.store(out_ptr55 + (tl.full([XBLOCK], 0, tl.int32)), tmp223, None)
+	    elif pid < num_xblocks_56:
+	        pid_offset = pid - num_xblocks_55
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp224 = tl.load(in_ptr56 + (0))
+	        tmp225 = tl.broadcast_to(tmp224, [XBLOCK])
+	        tmp226 = 1.0
+	        tmp227 = tmp225 + tmp226
+	        tl.store(out_ptr56 + (tl.full([XBLOCK], 0, tl.int32)), tmp227, None)
+	    elif pid < num_xblocks_57:
+	        pid_offset = pid - num_xblocks_56
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp228 = tl.load(in_ptr57 + (0))
+	        tmp229 = tl.broadcast_to(tmp228, [XBLOCK])
+	        tmp230 = 1.0
+	        tmp231 = tmp229 + tmp230
+	        tl.store(out_ptr57 + (tl.full([XBLOCK], 0, tl.int32)), tmp231, None)
+	    elif pid < num_xblocks_58:
+	        pid_offset = pid - num_xblocks_57
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp232 = tl.load(in_ptr58 + (0))
+	        tmp233 = tl.broadcast_to(tmp232, [XBLOCK])
+	        tmp234 = 1.0
+	        tmp235 = tmp233 + tmp234
+	        tl.store(out_ptr58 + (tl.full([XBLOCK], 0, tl.int32)), tmp235, None)
+	    elif pid < num_xblocks_59:
+	        pid_offset = pid - num_xblocks_58
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp236 = tl.load(in_ptr59 + (0))
+	        tmp237 = tl.broadcast_to(tmp236, [XBLOCK])
+	        tmp238 = 1.0
+	        tmp239 = tmp237 + tmp238
+	        tl.store(out_ptr59 + (tl.full([XBLOCK], 0, tl.int32)), tmp239, None)
+	    elif pid < num_xblocks_60:
+	        pid_offset = pid - num_xblocks_59
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp240 = tl.load(in_ptr60 + (0))
+	        tmp241 = tl.broadcast_to(tmp240, [XBLOCK])
+	        tmp242 = 1.0
+	        tmp243 = tmp241 + tmp242
+	        tl.store(out_ptr60 + (tl.full([XBLOCK], 0, tl.int32)), tmp243, None)
+	    elif pid < num_xblocks_61:
+	        pid_offset = pid - num_xblocks_60
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp244 = tl.load(in_ptr61 + (0))
+	        tmp245 = tl.broadcast_to(tmp244, [XBLOCK])
+	        tmp246 = 1.0
+	        tmp247 = tmp245 + tmp246
+	        tl.store(out_ptr61 + (tl.full([XBLOCK], 0, tl.int32)), tmp247, None)
+	    elif pid < num_xblocks_62:
+	        pid_offset = pid - num_xblocks_61
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp248 = tl.load(in_ptr62 + (0))
+	        tmp249 = tl.broadcast_to(tmp248, [XBLOCK])
+	        tmp250 = 1.0
+	        tmp251 = tmp249 + tmp250
+	        tl.store(out_ptr62 + (tl.full([XBLOCK], 0, tl.int32)), tmp251, None)
+	    elif pid < num_xblocks_63:
+	        pid_offset = pid - num_xblocks_62
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp252 = tl.load(in_ptr63 + (0))
+	        tmp253 = tl.broadcast_to(tmp252, [XBLOCK])
+	        tmp254 = 1.0
+	        tmp255 = tmp253 + tmp254
+	        tl.store(out_ptr63 + (tl.full([XBLOCK], 0, tl.int32)), tmp255, None)
+	    elif pid < num_xblocks_64:
+	        pid_offset = pid - num_xblocks_63
+	        xnumel = 1
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        tmp256 = tl.load(in_ptr64 + (0))
+	        tmp257 = tl.broadcast_to(tmp256, [XBLOCK])
+	        tmp258 = 1.0
+	        tmp259 = tmp257 + tmp258
+	        tl.store(out_ptr64 + (tl.full([XBLOCK], 0, tl.int32)), tmp259, None)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/m5/cm5mdw5oawz5pdxxz24qte3du6nt7a74gsscrft2gcgexsa4agxx.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_2 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_2', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(38633472, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(786432, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(589824, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 38633472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), None)
+	        tmp1 = tl.load(in_ptr1 + (x0), None)
+	        tmp6 = tl.load(in_ptr2 + (x0), None)
+	        tmp13 = tl.load(in_ptr3 + (x0), None)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, None)
+	        tl.store(out_ptr2 + (x0), tmp34, None)
+	        tl.store(out_ptr3 + (x0), tmp12, None)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 786432
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), None)
+	        tmp36 = tl.load(in_ptr6 + (x1), None)
+	        tmp41 = tl.load(in_ptr7 + (x1), None)
+	        tmp48 = tl.load(in_ptr8 + (x1), None)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, None)
+	        tl.store(out_ptr6 + (x1), tmp69, None)
+	        tl.store(out_ptr7 + (x1), tmp47, None)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), xmask)
+	        tmp106 = tl.load(in_ptr16 + (x3), xmask)
+	        tmp111 = tl.load(in_ptr17 + (x3), xmask)
+	        tmp118 = tl.load(in_ptr18 + (x3), xmask)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, xmask)
+	        tl.store(out_ptr14 + (x3), tmp139, xmask)
+	        tl.store(out_ptr15 + (x3), tmp117, xmask)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), None)
+	        tmp141 = tl.load(in_ptr21 + (x4), None)
+	        tmp146 = tl.load(in_ptr22 + (x4), None)
+	        tmp153 = tl.load(in_ptr23 + (x4), None)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, None)
+	        tl.store(out_ptr18 + (x4), tmp174, None)
+	        tl.store(out_ptr19 + (x4), tmp152, None)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), xmask)
+	        tmp176 = tl.load(in_ptr26 + (x5), xmask)
+	        tmp181 = tl.load(in_ptr27 + (x5), xmask)
+	        tmp188 = tl.load(in_ptr28 + (x5), xmask)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, xmask)
+	        tl.store(out_ptr22 + (x5), tmp209, xmask)
+	        tl.store(out_ptr23 + (x5), tmp187, xmask)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), None)
+	        tmp211 = tl.load(in_ptr31 + (x6), None)
+	        tmp216 = tl.load(in_ptr32 + (x6), None)
+	        tmp223 = tl.load(in_ptr33 + (x6), None)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, None)
+	        tl.store(out_ptr26 + (x6), tmp244, None)
+	        tl.store(out_ptr27 + (x6), tmp222, None)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), xmask)
+	        tmp316 = tl.load(in_ptr46 + (x9), xmask)
+	        tmp321 = tl.load(in_ptr47 + (x9), xmask)
+	        tmp328 = tl.load(in_ptr48 + (x9), xmask)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, xmask)
+	        tl.store(out_ptr38 + (x9), tmp349, xmask)
+	        tl.store(out_ptr39 + (x9), tmp327, xmask)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), None)
+	        tmp351 = tl.load(in_ptr51 + (x10), None)
+	        tmp356 = tl.load(in_ptr52 + (x10), None)
+	        tmp363 = tl.load(in_ptr53 + (x10), None)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, None)
+	        tl.store(out_ptr42 + (x10), tmp384, None)
+	        tl.store(out_ptr43 + (x10), tmp362, None)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), xmask)
+	        tmp386 = tl.load(in_ptr56 + (x11), xmask)
+	        tmp391 = tl.load(in_ptr57 + (x11), xmask)
+	        tmp398 = tl.load(in_ptr58 + (x11), xmask)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, xmask)
+	        tl.store(out_ptr46 + (x11), tmp419, xmask)
+	        tl.store(out_ptr47 + (x11), tmp397, xmask)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), None)
+	        tmp421 = tl.load(in_ptr61 + (x12), None)
+	        tmp426 = tl.load(in_ptr62 + (x12), None)
+	        tmp433 = tl.load(in_ptr63 + (x12), None)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, None)
+	        tl.store(out_ptr50 + (x12), tmp454, None)
+	        tl.store(out_ptr51 + (x12), tmp432, None)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), xmask)
+	        tmp526 = tl.load(in_ptr76 + (x15), xmask)
+	        tmp531 = tl.load(in_ptr77 + (x15), xmask)
+	        tmp538 = tl.load(in_ptr78 + (x15), xmask)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, xmask)
+	        tl.store(out_ptr62 + (x15), tmp559, xmask)
+	        tl.store(out_ptr63 + (x15), tmp537, xmask)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), None)
+	        tmp561 = tl.load(in_ptr81 + (x16), None)
+	        tmp566 = tl.load(in_ptr82 + (x16), None)
+	        tmp573 = tl.load(in_ptr83 + (x16), None)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, None)
+	        tl.store(out_ptr66 + (x16), tmp594, None)
+	        tl.store(out_ptr67 + (x16), tmp572, None)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), xmask)
+	        tmp596 = tl.load(in_ptr86 + (x17), xmask)
+	        tmp601 = tl.load(in_ptr87 + (x17), xmask)
+	        tmp608 = tl.load(in_ptr88 + (x17), xmask)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, xmask)
+	        tl.store(out_ptr70 + (x17), tmp629, xmask)
+	        tl.store(out_ptr71 + (x17), tmp607, xmask)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), None)
+	        tmp631 = tl.load(in_ptr91 + (x18), None)
+	        tmp636 = tl.load(in_ptr92 + (x18), None)
+	        tmp643 = tl.load(in_ptr93 + (x18), None)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, None)
+	        tl.store(out_ptr74 + (x18), tmp664, None)
+	        tl.store(out_ptr75 + (x18), tmp642, None)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/uc/cucov6bdfoahzje6orumwjboz53njy6qiq76hwevxfqpml6gkhy7.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_3 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_3', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(768, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	        tmp6 = tl.load(in_ptr2 + (x0), xmask)
+	        tmp13 = tl.load(in_ptr3 + (x0), xmask)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, xmask)
+	        tl.store(out_ptr2 + (x0), tmp34, xmask)
+	        tl.store(out_ptr3 + (x0), tmp12, xmask)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), xmask)
+	        tmp36 = tl.load(in_ptr6 + (x1), xmask)
+	        tmp41 = tl.load(in_ptr7 + (x1), xmask)
+	        tmp48 = tl.load(in_ptr8 + (x1), xmask)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, xmask)
+	        tl.store(out_ptr6 + (x1), tmp69, xmask)
+	        tl.store(out_ptr7 + (x1), tmp47, xmask)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), None)
+	        tmp106 = tl.load(in_ptr16 + (x3), None)
+	        tmp111 = tl.load(in_ptr17 + (x3), None)
+	        tmp118 = tl.load(in_ptr18 + (x3), None)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, None)
+	        tl.store(out_ptr14 + (x3), tmp139, None)
+	        tl.store(out_ptr15 + (x3), tmp117, None)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), xmask)
+	        tmp141 = tl.load(in_ptr21 + (x4), xmask)
+	        tmp146 = tl.load(in_ptr22 + (x4), xmask)
+	        tmp153 = tl.load(in_ptr23 + (x4), xmask)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, xmask)
+	        tl.store(out_ptr18 + (x4), tmp174, xmask)
+	        tl.store(out_ptr19 + (x4), tmp152, xmask)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), None)
+	        tmp176 = tl.load(in_ptr26 + (x5), None)
+	        tmp181 = tl.load(in_ptr27 + (x5), None)
+	        tmp188 = tl.load(in_ptr28 + (x5), None)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, None)
+	        tl.store(out_ptr22 + (x5), tmp209, None)
+	        tl.store(out_ptr23 + (x5), tmp187, None)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), xmask)
+	        tmp211 = tl.load(in_ptr31 + (x6), xmask)
+	        tmp216 = tl.load(in_ptr32 + (x6), xmask)
+	        tmp223 = tl.load(in_ptr33 + (x6), xmask)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, xmask)
+	        tl.store(out_ptr26 + (x6), tmp244, xmask)
+	        tl.store(out_ptr27 + (x6), tmp222, xmask)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), None)
+	        tmp316 = tl.load(in_ptr46 + (x9), None)
+	        tmp321 = tl.load(in_ptr47 + (x9), None)
+	        tmp328 = tl.load(in_ptr48 + (x9), None)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, None)
+	        tl.store(out_ptr38 + (x9), tmp349, None)
+	        tl.store(out_ptr39 + (x9), tmp327, None)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), xmask)
+	        tmp351 = tl.load(in_ptr51 + (x10), xmask)
+	        tmp356 = tl.load(in_ptr52 + (x10), xmask)
+	        tmp363 = tl.load(in_ptr53 + (x10), xmask)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, xmask)
+	        tl.store(out_ptr42 + (x10), tmp384, xmask)
+	        tl.store(out_ptr43 + (x10), tmp362, xmask)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), None)
+	        tmp386 = tl.load(in_ptr56 + (x11), None)
+	        tmp391 = tl.load(in_ptr57 + (x11), None)
+	        tmp398 = tl.load(in_ptr58 + (x11), None)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, None)
+	        tl.store(out_ptr46 + (x11), tmp419, None)
+	        tl.store(out_ptr47 + (x11), tmp397, None)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), xmask)
+	        tmp421 = tl.load(in_ptr61 + (x12), xmask)
+	        tmp426 = tl.load(in_ptr62 + (x12), xmask)
+	        tmp433 = tl.load(in_ptr63 + (x12), xmask)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, xmask)
+	        tl.store(out_ptr50 + (x12), tmp454, xmask)
+	        tl.store(out_ptr51 + (x12), tmp432, xmask)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), None)
+	        tmp526 = tl.load(in_ptr76 + (x15), None)
+	        tmp531 = tl.load(in_ptr77 + (x15), None)
+	        tmp538 = tl.load(in_ptr78 + (x15), None)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, None)
+	        tl.store(out_ptr62 + (x15), tmp559, None)
+	        tl.store(out_ptr63 + (x15), tmp537, None)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), xmask)
+	        tmp561 = tl.load(in_ptr81 + (x16), xmask)
+	        tmp566 = tl.load(in_ptr82 + (x16), xmask)
+	        tmp573 = tl.load(in_ptr83 + (x16), xmask)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, xmask)
+	        tl.store(out_ptr66 + (x16), tmp594, xmask)
+	        tl.store(out_ptr67 + (x16), tmp572, xmask)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), None)
+	        tmp596 = tl.load(in_ptr86 + (x17), None)
+	        tmp601 = tl.load(in_ptr87 + (x17), None)
+	        tmp608 = tl.load(in_ptr88 + (x17), None)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, None)
+	        tl.store(out_ptr70 + (x17), tmp629, None)
+	        tl.store(out_ptr71 + (x17), tmp607, None)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), xmask)
+	        tmp631 = tl.load(in_ptr91 + (x18), xmask)
+	        tmp636 = tl.load(in_ptr92 + (x18), xmask)
+	        tmp643 = tl.load(in_ptr93 + (x18), xmask)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, xmask)
+	        tl.store(out_ptr74 + (x18), tmp664, xmask)
+	        tl.store(out_ptr75 + (x18), tmp642, xmask)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/bh/cbhrsnjoafi77bxs2ehpwzsnbhqnh3f6deudav2zfp3cry6aeyeg.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_4 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_4', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(768, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	        tmp6 = tl.load(in_ptr2 + (x0), xmask)
+	        tmp13 = tl.load(in_ptr3 + (x0), xmask)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, xmask)
+	        tl.store(out_ptr2 + (x0), tmp34, xmask)
+	        tl.store(out_ptr3 + (x0), tmp12, xmask)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), xmask)
+	        tmp36 = tl.load(in_ptr6 + (x1), xmask)
+	        tmp41 = tl.load(in_ptr7 + (x1), xmask)
+	        tmp48 = tl.load(in_ptr8 + (x1), xmask)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, xmask)
+	        tl.store(out_ptr6 + (x1), tmp69, xmask)
+	        tl.store(out_ptr7 + (x1), tmp47, xmask)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), None)
+	        tmp71 = tl.load(in_ptr11 + (x2), None)
+	        tmp76 = tl.load(in_ptr12 + (x2), None)
+	        tmp83 = tl.load(in_ptr13 + (x2), None)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, None)
+	        tl.store(out_ptr10 + (x2), tmp104, None)
+	        tl.store(out_ptr11 + (x2), tmp82, None)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), xmask)
+	        tmp106 = tl.load(in_ptr16 + (x3), xmask)
+	        tmp111 = tl.load(in_ptr17 + (x3), xmask)
+	        tmp118 = tl.load(in_ptr18 + (x3), xmask)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, xmask)
+	        tl.store(out_ptr14 + (x3), tmp139, xmask)
+	        tl.store(out_ptr15 + (x3), tmp117, xmask)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), None)
+	        tmp141 = tl.load(in_ptr21 + (x4), None)
+	        tmp146 = tl.load(in_ptr22 + (x4), None)
+	        tmp153 = tl.load(in_ptr23 + (x4), None)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, None)
+	        tl.store(out_ptr18 + (x4), tmp174, None)
+	        tl.store(out_ptr19 + (x4), tmp152, None)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), xmask)
+	        tmp176 = tl.load(in_ptr26 + (x5), xmask)
+	        tmp181 = tl.load(in_ptr27 + (x5), xmask)
+	        tmp188 = tl.load(in_ptr28 + (x5), xmask)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, xmask)
+	        tl.store(out_ptr22 + (x5), tmp209, xmask)
+	        tl.store(out_ptr23 + (x5), tmp187, xmask)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), xmask)
+	        tmp211 = tl.load(in_ptr31 + (x6), xmask)
+	        tmp216 = tl.load(in_ptr32 + (x6), xmask)
+	        tmp223 = tl.load(in_ptr33 + (x6), xmask)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, xmask)
+	        tl.store(out_ptr26 + (x6), tmp244, xmask)
+	        tl.store(out_ptr27 + (x6), tmp222, xmask)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), None)
+	        tmp281 = tl.load(in_ptr41 + (x8), None)
+	        tmp286 = tl.load(in_ptr42 + (x8), None)
+	        tmp293 = tl.load(in_ptr43 + (x8), None)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, None)
+	        tl.store(out_ptr34 + (x8), tmp314, None)
+	        tl.store(out_ptr35 + (x8), tmp292, None)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), xmask)
+	        tmp316 = tl.load(in_ptr46 + (x9), xmask)
+	        tmp321 = tl.load(in_ptr47 + (x9), xmask)
+	        tmp328 = tl.load(in_ptr48 + (x9), xmask)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, xmask)
+	        tl.store(out_ptr38 + (x9), tmp349, xmask)
+	        tl.store(out_ptr39 + (x9), tmp327, xmask)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), None)
+	        tmp351 = tl.load(in_ptr51 + (x10), None)
+	        tmp356 = tl.load(in_ptr52 + (x10), None)
+	        tmp363 = tl.load(in_ptr53 + (x10), None)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, None)
+	        tl.store(out_ptr42 + (x10), tmp384, None)
+	        tl.store(out_ptr43 + (x10), tmp362, None)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), xmask)
+	        tmp386 = tl.load(in_ptr56 + (x11), xmask)
+	        tmp391 = tl.load(in_ptr57 + (x11), xmask)
+	        tmp398 = tl.load(in_ptr58 + (x11), xmask)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, xmask)
+	        tl.store(out_ptr46 + (x11), tmp419, xmask)
+	        tl.store(out_ptr47 + (x11), tmp397, xmask)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), xmask)
+	        tmp421 = tl.load(in_ptr61 + (x12), xmask)
+	        tmp426 = tl.load(in_ptr62 + (x12), xmask)
+	        tmp433 = tl.load(in_ptr63 + (x12), xmask)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, xmask)
+	        tl.store(out_ptr50 + (x12), tmp454, xmask)
+	        tl.store(out_ptr51 + (x12), tmp432, xmask)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), None)
+	        tmp491 = tl.load(in_ptr71 + (x14), None)
+	        tmp496 = tl.load(in_ptr72 + (x14), None)
+	        tmp503 = tl.load(in_ptr73 + (x14), None)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, None)
+	        tl.store(out_ptr58 + (x14), tmp524, None)
+	        tl.store(out_ptr59 + (x14), tmp502, None)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), xmask)
+	        tmp526 = tl.load(in_ptr76 + (x15), xmask)
+	        tmp531 = tl.load(in_ptr77 + (x15), xmask)
+	        tmp538 = tl.load(in_ptr78 + (x15), xmask)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, xmask)
+	        tl.store(out_ptr62 + (x15), tmp559, xmask)
+	        tl.store(out_ptr63 + (x15), tmp537, xmask)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), None)
+	        tmp561 = tl.load(in_ptr81 + (x16), None)
+	        tmp566 = tl.load(in_ptr82 + (x16), None)
+	        tmp573 = tl.load(in_ptr83 + (x16), None)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, None)
+	        tl.store(out_ptr66 + (x16), tmp594, None)
+	        tl.store(out_ptr67 + (x16), tmp572, None)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), xmask)
+	        tmp596 = tl.load(in_ptr86 + (x17), xmask)
+	        tmp601 = tl.load(in_ptr87 + (x17), xmask)
+	        tmp608 = tl.load(in_ptr88 + (x17), xmask)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, xmask)
+	        tl.store(out_ptr70 + (x17), tmp629, xmask)
+	        tl.store(out_ptr71 + (x17), tmp607, xmask)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), xmask)
+	        tmp631 = tl.load(in_ptr91 + (x18), xmask)
+	        tmp636 = tl.load(in_ptr92 + (x18), xmask)
+	        tmp643 = tl.load(in_ptr93 + (x18), xmask)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, xmask)
+	        tl.store(out_ptr74 + (x18), tmp664, xmask)
+	        tl.store(out_ptr75 + (x18), tmp642, xmask)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/st/cstrucu2uxk3ht2xjqayfn4a2bmj6fq5izilkcg3bjyjuhtnj5nc.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_5 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_5', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(768, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(768, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	        tmp6 = tl.load(in_ptr2 + (x0), xmask)
+	        tmp13 = tl.load(in_ptr3 + (x0), xmask)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, xmask)
+	        tl.store(out_ptr2 + (x0), tmp34, xmask)
+	        tl.store(out_ptr3 + (x0), tmp12, xmask)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), None)
+	        tmp36 = tl.load(in_ptr6 + (x1), None)
+	        tmp41 = tl.load(in_ptr7 + (x1), None)
+	        tmp48 = tl.load(in_ptr8 + (x1), None)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, None)
+	        tl.store(out_ptr6 + (x1), tmp69, None)
+	        tl.store(out_ptr7 + (x1), tmp47, None)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), None)
+	        tmp106 = tl.load(in_ptr16 + (x3), None)
+	        tmp111 = tl.load(in_ptr17 + (x3), None)
+	        tmp118 = tl.load(in_ptr18 + (x3), None)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, None)
+	        tl.store(out_ptr14 + (x3), tmp139, None)
+	        tl.store(out_ptr15 + (x3), tmp117, None)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), xmask)
+	        tmp141 = tl.load(in_ptr21 + (x4), xmask)
+	        tmp146 = tl.load(in_ptr22 + (x4), xmask)
+	        tmp153 = tl.load(in_ptr23 + (x4), xmask)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, xmask)
+	        tl.store(out_ptr18 + (x4), tmp174, xmask)
+	        tl.store(out_ptr19 + (x4), tmp152, xmask)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), xmask)
+	        tmp176 = tl.load(in_ptr26 + (x5), xmask)
+	        tmp181 = tl.load(in_ptr27 + (x5), xmask)
+	        tmp188 = tl.load(in_ptr28 + (x5), xmask)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, xmask)
+	        tl.store(out_ptr22 + (x5), tmp209, xmask)
+	        tl.store(out_ptr23 + (x5), tmp187, xmask)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), xmask)
+	        tmp211 = tl.load(in_ptr31 + (x6), xmask)
+	        tmp216 = tl.load(in_ptr32 + (x6), xmask)
+	        tmp223 = tl.load(in_ptr33 + (x6), xmask)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, xmask)
+	        tl.store(out_ptr26 + (x6), tmp244, xmask)
+	        tl.store(out_ptr27 + (x6), tmp222, xmask)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), None)
+	        tmp246 = tl.load(in_ptr36 + (x7), None)
+	        tmp251 = tl.load(in_ptr37 + (x7), None)
+	        tmp258 = tl.load(in_ptr38 + (x7), None)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, None)
+	        tl.store(out_ptr30 + (x7), tmp279, None)
+	        tl.store(out_ptr31 + (x7), tmp257, None)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), None)
+	        tmp316 = tl.load(in_ptr46 + (x9), None)
+	        tmp321 = tl.load(in_ptr47 + (x9), None)
+	        tmp328 = tl.load(in_ptr48 + (x9), None)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, None)
+	        tl.store(out_ptr38 + (x9), tmp349, None)
+	        tl.store(out_ptr39 + (x9), tmp327, None)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), xmask)
+	        tmp351 = tl.load(in_ptr51 + (x10), xmask)
+	        tmp356 = tl.load(in_ptr52 + (x10), xmask)
+	        tmp363 = tl.load(in_ptr53 + (x10), xmask)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, xmask)
+	        tl.store(out_ptr42 + (x10), tmp384, xmask)
+	        tl.store(out_ptr43 + (x10), tmp362, xmask)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), xmask)
+	        tmp386 = tl.load(in_ptr56 + (x11), xmask)
+	        tmp391 = tl.load(in_ptr57 + (x11), xmask)
+	        tmp398 = tl.load(in_ptr58 + (x11), xmask)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, xmask)
+	        tl.store(out_ptr46 + (x11), tmp419, xmask)
+	        tl.store(out_ptr47 + (x11), tmp397, xmask)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), xmask)
+	        tmp421 = tl.load(in_ptr61 + (x12), xmask)
+	        tmp426 = tl.load(in_ptr62 + (x12), xmask)
+	        tmp433 = tl.load(in_ptr63 + (x12), xmask)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, xmask)
+	        tl.store(out_ptr50 + (x12), tmp454, xmask)
+	        tl.store(out_ptr51 + (x12), tmp432, xmask)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), None)
+	        tmp456 = tl.load(in_ptr66 + (x13), None)
+	        tmp461 = tl.load(in_ptr67 + (x13), None)
+	        tmp468 = tl.load(in_ptr68 + (x13), None)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, None)
+	        tl.store(out_ptr54 + (x13), tmp489, None)
+	        tl.store(out_ptr55 + (x13), tmp467, None)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), None)
+	        tmp526 = tl.load(in_ptr76 + (x15), None)
+	        tmp531 = tl.load(in_ptr77 + (x15), None)
+	        tmp538 = tl.load(in_ptr78 + (x15), None)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, None)
+	        tl.store(out_ptr62 + (x15), tmp559, None)
+	        tl.store(out_ptr63 + (x15), tmp537, None)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), xmask)
+	        tmp561 = tl.load(in_ptr81 + (x16), xmask)
+	        tmp566 = tl.load(in_ptr82 + (x16), xmask)
+	        tmp573 = tl.load(in_ptr83 + (x16), xmask)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, xmask)
+	        tl.store(out_ptr66 + (x16), tmp594, xmask)
+	        tl.store(out_ptr67 + (x16), tmp572, xmask)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), xmask)
+	        tmp596 = tl.load(in_ptr86 + (x17), xmask)
+	        tmp601 = tl.load(in_ptr87 + (x17), xmask)
+	        tmp608 = tl.load(in_ptr88 + (x17), xmask)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, xmask)
+	        tl.store(out_ptr70 + (x17), tmp629, xmask)
+	        tl.store(out_ptr71 + (x17), tmp607, xmask)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), xmask)
+	        tmp631 = tl.load(in_ptr91 + (x18), xmask)
+	        tmp636 = tl.load(in_ptr92 + (x18), xmask)
+	        tmp643 = tl.load(in_ptr93 + (x18), xmask)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, xmask)
+	        tl.store(out_ptr74 + (x18), tmp664, xmask)
+	        tl.store(out_ptr75 + (x18), tmp642, xmask)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/i7/ci7zf4ub54qutpo6uq7b2vzte3edxvvknb4h5fur6mesuyjgze47.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_6 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_6', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(2359296, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), None)
+	        tmp1 = tl.load(in_ptr1 + (x0), None)
+	        tmp6 = tl.load(in_ptr2 + (x0), None)
+	        tmp13 = tl.load(in_ptr3 + (x0), None)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, None)
+	        tl.store(out_ptr2 + (x0), tmp34, None)
+	        tl.store(out_ptr3 + (x0), tmp12, None)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), xmask)
+	        tmp36 = tl.load(in_ptr6 + (x1), xmask)
+	        tmp41 = tl.load(in_ptr7 + (x1), xmask)
+	        tmp48 = tl.load(in_ptr8 + (x1), xmask)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, xmask)
+	        tl.store(out_ptr6 + (x1), tmp69, xmask)
+	        tl.store(out_ptr7 + (x1), tmp47, xmask)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), None)
+	        tmp71 = tl.load(in_ptr11 + (x2), None)
+	        tmp76 = tl.load(in_ptr12 + (x2), None)
+	        tmp83 = tl.load(in_ptr13 + (x2), None)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, None)
+	        tl.store(out_ptr10 + (x2), tmp104, None)
+	        tl.store(out_ptr11 + (x2), tmp82, None)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), xmask)
+	        tmp106 = tl.load(in_ptr16 + (x3), xmask)
+	        tmp111 = tl.load(in_ptr17 + (x3), xmask)
+	        tmp118 = tl.load(in_ptr18 + (x3), xmask)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, xmask)
+	        tl.store(out_ptr14 + (x3), tmp139, xmask)
+	        tl.store(out_ptr15 + (x3), tmp117, xmask)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), xmask)
+	        tmp141 = tl.load(in_ptr21 + (x4), xmask)
+	        tmp146 = tl.load(in_ptr22 + (x4), xmask)
+	        tmp153 = tl.load(in_ptr23 + (x4), xmask)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, xmask)
+	        tl.store(out_ptr18 + (x4), tmp174, xmask)
+	        tl.store(out_ptr19 + (x4), tmp152, xmask)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), xmask)
+	        tmp176 = tl.load(in_ptr26 + (x5), xmask)
+	        tmp181 = tl.load(in_ptr27 + (x5), xmask)
+	        tmp188 = tl.load(in_ptr28 + (x5), xmask)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, xmask)
+	        tl.store(out_ptr22 + (x5), tmp209, xmask)
+	        tl.store(out_ptr23 + (x5), tmp187, xmask)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), None)
+	        tmp211 = tl.load(in_ptr31 + (x6), None)
+	        tmp216 = tl.load(in_ptr32 + (x6), None)
+	        tmp223 = tl.load(in_ptr33 + (x6), None)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, None)
+	        tl.store(out_ptr26 + (x6), tmp244, None)
+	        tl.store(out_ptr27 + (x6), tmp222, None)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), None)
+	        tmp281 = tl.load(in_ptr41 + (x8), None)
+	        tmp286 = tl.load(in_ptr42 + (x8), None)
+	        tmp293 = tl.load(in_ptr43 + (x8), None)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, None)
+	        tl.store(out_ptr34 + (x8), tmp314, None)
+	        tl.store(out_ptr35 + (x8), tmp292, None)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), xmask)
+	        tmp316 = tl.load(in_ptr46 + (x9), xmask)
+	        tmp321 = tl.load(in_ptr47 + (x9), xmask)
+	        tmp328 = tl.load(in_ptr48 + (x9), xmask)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, xmask)
+	        tl.store(out_ptr38 + (x9), tmp349, xmask)
+	        tl.store(out_ptr39 + (x9), tmp327, xmask)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), xmask)
+	        tmp351 = tl.load(in_ptr51 + (x10), xmask)
+	        tmp356 = tl.load(in_ptr52 + (x10), xmask)
+	        tmp363 = tl.load(in_ptr53 + (x10), xmask)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, xmask)
+	        tl.store(out_ptr42 + (x10), tmp384, xmask)
+	        tl.store(out_ptr43 + (x10), tmp362, xmask)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), xmask)
+	        tmp386 = tl.load(in_ptr56 + (x11), xmask)
+	        tmp391 = tl.load(in_ptr57 + (x11), xmask)
+	        tmp398 = tl.load(in_ptr58 + (x11), xmask)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, xmask)
+	        tl.store(out_ptr46 + (x11), tmp419, xmask)
+	        tl.store(out_ptr47 + (x11), tmp397, xmask)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), None)
+	        tmp421 = tl.load(in_ptr61 + (x12), None)
+	        tmp426 = tl.load(in_ptr62 + (x12), None)
+	        tmp433 = tl.load(in_ptr63 + (x12), None)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, None)
+	        tl.store(out_ptr50 + (x12), tmp454, None)
+	        tl.store(out_ptr51 + (x12), tmp432, None)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), None)
+	        tmp491 = tl.load(in_ptr71 + (x14), None)
+	        tmp496 = tl.load(in_ptr72 + (x14), None)
+	        tmp503 = tl.load(in_ptr73 + (x14), None)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, None)
+	        tl.store(out_ptr58 + (x14), tmp524, None)
+	        tl.store(out_ptr59 + (x14), tmp502, None)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), xmask)
+	        tmp526 = tl.load(in_ptr76 + (x15), xmask)
+	        tmp531 = tl.load(in_ptr77 + (x15), xmask)
+	        tmp538 = tl.load(in_ptr78 + (x15), xmask)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, xmask)
+	        tl.store(out_ptr62 + (x15), tmp559, xmask)
+	        tl.store(out_ptr63 + (x15), tmp537, xmask)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), xmask)
+	        tmp561 = tl.load(in_ptr81 + (x16), xmask)
+	        tmp566 = tl.load(in_ptr82 + (x16), xmask)
+	        tmp573 = tl.load(in_ptr83 + (x16), xmask)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, xmask)
+	        tl.store(out_ptr66 + (x16), tmp594, xmask)
+	        tl.store(out_ptr67 + (x16), tmp572, xmask)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), xmask)
+	        tmp596 = tl.load(in_ptr86 + (x17), xmask)
+	        tmp601 = tl.load(in_ptr87 + (x17), xmask)
+	        tmp608 = tl.load(in_ptr88 + (x17), xmask)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, xmask)
+	        tl.store(out_ptr70 + (x17), tmp629, xmask)
+	        tl.store(out_ptr71 + (x17), tmp607, xmask)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), None)
+	        tmp631 = tl.load(in_ptr91 + (x18), None)
+	        tmp636 = tl.load(in_ptr92 + (x18), None)
+	        tmp643 = tl.load(in_ptr93 + (x18), None)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, None)
+	        tl.store(out_ptr74 + (x18), tmp664, None)
+	        tl.store(out_ptr75 + (x18), tmp642, None)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/jn/cjnlfnxlu6k55o3ttjkcyltkwk6qkjiz7qsvfo3lxai6avs4elew.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_7 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_7', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(3072, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(2304, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	        tmp6 = tl.load(in_ptr2 + (x0), xmask)
+	        tmp13 = tl.load(in_ptr3 + (x0), xmask)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, xmask)
+	        tl.store(out_ptr2 + (x0), tmp34, xmask)
+	        tl.store(out_ptr3 + (x0), tmp12, xmask)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), None)
+	        tmp36 = tl.load(in_ptr6 + (x1), None)
+	        tmp41 = tl.load(in_ptr7 + (x1), None)
+	        tmp48 = tl.load(in_ptr8 + (x1), None)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, None)
+	        tl.store(out_ptr6 + (x1), tmp69, None)
+	        tl.store(out_ptr7 + (x1), tmp47, None)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), xmask)
+	        tmp106 = tl.load(in_ptr16 + (x3), xmask)
+	        tmp111 = tl.load(in_ptr17 + (x3), xmask)
+	        tmp118 = tl.load(in_ptr18 + (x3), xmask)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, xmask)
+	        tl.store(out_ptr14 + (x3), tmp139, xmask)
+	        tl.store(out_ptr15 + (x3), tmp117, xmask)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), xmask)
+	        tmp141 = tl.load(in_ptr21 + (x4), xmask)
+	        tmp146 = tl.load(in_ptr22 + (x4), xmask)
+	        tmp153 = tl.load(in_ptr23 + (x4), xmask)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, xmask)
+	        tl.store(out_ptr18 + (x4), tmp174, xmask)
+	        tl.store(out_ptr19 + (x4), tmp152, xmask)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), None)
+	        tmp176 = tl.load(in_ptr26 + (x5), None)
+	        tmp181 = tl.load(in_ptr27 + (x5), None)
+	        tmp188 = tl.load(in_ptr28 + (x5), None)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, None)
+	        tl.store(out_ptr22 + (x5), tmp209, None)
+	        tl.store(out_ptr23 + (x5), tmp187, None)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), xmask)
+	        tmp211 = tl.load(in_ptr31 + (x6), xmask)
+	        tmp216 = tl.load(in_ptr32 + (x6), xmask)
+	        tmp223 = tl.load(in_ptr33 + (x6), xmask)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, xmask)
+	        tl.store(out_ptr26 + (x6), tmp244, xmask)
+	        tl.store(out_ptr27 + (x6), tmp222, xmask)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), None)
+	        tmp246 = tl.load(in_ptr36 + (x7), None)
+	        tmp251 = tl.load(in_ptr37 + (x7), None)
+	        tmp258 = tl.load(in_ptr38 + (x7), None)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, None)
+	        tl.store(out_ptr30 + (x7), tmp279, None)
+	        tl.store(out_ptr31 + (x7), tmp257, None)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), xmask)
+	        tmp316 = tl.load(in_ptr46 + (x9), xmask)
+	        tmp321 = tl.load(in_ptr47 + (x9), xmask)
+	        tmp328 = tl.load(in_ptr48 + (x9), xmask)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, xmask)
+	        tl.store(out_ptr38 + (x9), tmp349, xmask)
+	        tl.store(out_ptr39 + (x9), tmp327, xmask)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), xmask)
+	        tmp351 = tl.load(in_ptr51 + (x10), xmask)
+	        tmp356 = tl.load(in_ptr52 + (x10), xmask)
+	        tmp363 = tl.load(in_ptr53 + (x10), xmask)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, xmask)
+	        tl.store(out_ptr42 + (x10), tmp384, xmask)
+	        tl.store(out_ptr43 + (x10), tmp362, xmask)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), None)
+	        tmp386 = tl.load(in_ptr56 + (x11), None)
+	        tmp391 = tl.load(in_ptr57 + (x11), None)
+	        tmp398 = tl.load(in_ptr58 + (x11), None)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, None)
+	        tl.store(out_ptr46 + (x11), tmp419, None)
+	        tl.store(out_ptr47 + (x11), tmp397, None)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), xmask)
+	        tmp421 = tl.load(in_ptr61 + (x12), xmask)
+	        tmp426 = tl.load(in_ptr62 + (x12), xmask)
+	        tmp433 = tl.load(in_ptr63 + (x12), xmask)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, xmask)
+	        tl.store(out_ptr50 + (x12), tmp454, xmask)
+	        tl.store(out_ptr51 + (x12), tmp432, xmask)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), None)
+	        tmp456 = tl.load(in_ptr66 + (x13), None)
+	        tmp461 = tl.load(in_ptr67 + (x13), None)
+	        tmp468 = tl.load(in_ptr68 + (x13), None)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, None)
+	        tl.store(out_ptr54 + (x13), tmp489, None)
+	        tl.store(out_ptr55 + (x13), tmp467, None)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), xmask)
+	        tmp526 = tl.load(in_ptr76 + (x15), xmask)
+	        tmp531 = tl.load(in_ptr77 + (x15), xmask)
+	        tmp538 = tl.load(in_ptr78 + (x15), xmask)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, xmask)
+	        tl.store(out_ptr62 + (x15), tmp559, xmask)
+	        tl.store(out_ptr63 + (x15), tmp537, xmask)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), xmask)
+	        tmp561 = tl.load(in_ptr81 + (x16), xmask)
+	        tmp566 = tl.load(in_ptr82 + (x16), xmask)
+	        tmp573 = tl.load(in_ptr83 + (x16), xmask)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, xmask)
+	        tl.store(out_ptr66 + (x16), tmp594, xmask)
+	        tl.store(out_ptr67 + (x16), tmp572, xmask)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), None)
+	        tmp596 = tl.load(in_ptr86 + (x17), None)
+	        tmp601 = tl.load(in_ptr87 + (x17), None)
+	        tmp608 = tl.load(in_ptr88 + (x17), None)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, None)
+	        tl.store(out_ptr70 + (x17), tmp629, None)
+	        tl.store(out_ptr71 + (x17), tmp607, None)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), xmask)
+	        tmp631 = tl.load(in_ptr91 + (x18), xmask)
+	        tmp636 = tl.load(in_ptr92 + (x18), xmask)
+	        tmp643 = tl.load(in_ptr93 + (x18), xmask)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, xmask)
+	        tl.store(out_ptr74 + (x18), tmp664, xmask)
+	        tl.store(out_ptr75 + (x18), tmp642, xmask)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/ot/cotx3jbpug3oh3qmmhribixmalckqjrqblcah6fbsskmomye67hg.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_8 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: '*fp32', 144: '*fp32', 145: '*fp32', 146: '*fp32', 147: '*fp32', 148: '*fp32', 149: '*fp32', 150: '*fp32', 151: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_8', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr75', 'in_ptr77', 'in_ptr78', 'in_ptr8', 'in_ptr80', 'in_ptr82', 'in_ptr83', 'in_ptr85', 'in_ptr87', 'in_ptr88', 'in_ptr90', 'in_ptr92', 'in_ptr93', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr60', 'out_ptr62', 'out_ptr63', 'out_ptr64', 'out_ptr66', 'out_ptr67', 'out_ptr68', 'out_ptr7', 'out_ptr70', 'out_ptr71', 'out_ptr72', 'out_ptr74', 'out_ptr75', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59, out_ptr60, out_ptr62, out_ptr63, out_ptr64, out_ptr66, out_ptr67, out_ptr68, out_ptr70, out_ptr71, out_ptr72, out_ptr74, out_ptr75):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(589824, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_15 = num_xblocks_14 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_16 = num_xblocks_15 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_17 = num_xblocks_16 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_18 = num_xblocks_17 + tl.cdiv(2359296, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), None)
+	        tmp1 = tl.load(in_ptr1 + (x0), None)
+	        tmp6 = tl.load(in_ptr2 + (x0), None)
+	        tmp13 = tl.load(in_ptr3 + (x0), None)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, None)
+	        tl.store(out_ptr2 + (x0), tmp34, None)
+	        tl.store(out_ptr3 + (x0), tmp12, None)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), xmask)
+	        tmp36 = tl.load(in_ptr6 + (x1), xmask)
+	        tmp41 = tl.load(in_ptr7 + (x1), xmask)
+	        tmp48 = tl.load(in_ptr8 + (x1), xmask)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, xmask)
+	        tl.store(out_ptr6 + (x1), tmp69, xmask)
+	        tl.store(out_ptr7 + (x1), tmp47, xmask)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), xmask)
+	        tmp106 = tl.load(in_ptr16 + (x3), xmask)
+	        tmp111 = tl.load(in_ptr17 + (x3), xmask)
+	        tmp118 = tl.load(in_ptr18 + (x3), xmask)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, xmask)
+	        tl.store(out_ptr14 + (x3), tmp139, xmask)
+	        tl.store(out_ptr15 + (x3), tmp117, xmask)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), None)
+	        tmp141 = tl.load(in_ptr21 + (x4), None)
+	        tmp146 = tl.load(in_ptr22 + (x4), None)
+	        tmp153 = tl.load(in_ptr23 + (x4), None)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, None)
+	        tl.store(out_ptr18 + (x4), tmp174, None)
+	        tl.store(out_ptr19 + (x4), tmp152, None)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), xmask)
+	        tmp176 = tl.load(in_ptr26 + (x5), xmask)
+	        tmp181 = tl.load(in_ptr27 + (x5), xmask)
+	        tmp188 = tl.load(in_ptr28 + (x5), xmask)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, xmask)
+	        tl.store(out_ptr22 + (x5), tmp209, xmask)
+	        tl.store(out_ptr23 + (x5), tmp187, xmask)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), None)
+	        tmp211 = tl.load(in_ptr31 + (x6), None)
+	        tmp216 = tl.load(in_ptr32 + (x6), None)
+	        tmp223 = tl.load(in_ptr33 + (x6), None)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, None)
+	        tl.store(out_ptr26 + (x6), tmp244, None)
+	        tl.store(out_ptr27 + (x6), tmp222, None)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), xmask)
+	        tmp316 = tl.load(in_ptr46 + (x9), xmask)
+	        tmp321 = tl.load(in_ptr47 + (x9), xmask)
+	        tmp328 = tl.load(in_ptr48 + (x9), xmask)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, xmask)
+	        tl.store(out_ptr38 + (x9), tmp349, xmask)
+	        tl.store(out_ptr39 + (x9), tmp327, xmask)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), None)
+	        tmp351 = tl.load(in_ptr51 + (x10), None)
+	        tmp356 = tl.load(in_ptr52 + (x10), None)
+	        tmp363 = tl.load(in_ptr53 + (x10), None)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, None)
+	        tl.store(out_ptr42 + (x10), tmp384, None)
+	        tl.store(out_ptr43 + (x10), tmp362, None)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), xmask)
+	        tmp386 = tl.load(in_ptr56 + (x11), xmask)
+	        tmp391 = tl.load(in_ptr57 + (x11), xmask)
+	        tmp398 = tl.load(in_ptr58 + (x11), xmask)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, xmask)
+	        tl.store(out_ptr46 + (x11), tmp419, xmask)
+	        tl.store(out_ptr47 + (x11), tmp397, xmask)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), None)
+	        tmp421 = tl.load(in_ptr61 + (x12), None)
+	        tmp426 = tl.load(in_ptr62 + (x12), None)
+	        tmp433 = tl.load(in_ptr63 + (x12), None)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, None)
+	        tl.store(out_ptr50 + (x12), tmp454, None)
+	        tl.store(out_ptr51 + (x12), tmp432, None)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    elif pid < num_xblocks_15:
+	        pid_offset = pid - num_xblocks_14
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x15 = xindex
+	        tmp525 = tl.load(in_ptr75 + (x15), xmask)
+	        tmp526 = tl.load(in_ptr76 + (x15), xmask)
+	        tmp531 = tl.load(in_ptr77 + (x15), xmask)
+	        tmp538 = tl.load(in_ptr78 + (x15), xmask)
+	        tmp540 = tl.load(in_ptr79 + (0))
+	        tmp541 = tl.broadcast_to(tmp540, [XBLOCK])
+	        tmp527 = tmp526 - tmp525
+	        tmp528 = 0.09999999999999998
+	        tmp529 = tmp527 * tmp528
+	        tmp530 = tmp525 + tmp529
+	        tmp532 = 0.999
+	        tmp533 = tmp531 * tmp532
+	        tmp534 = tmp526 * tmp526
+	        tmp535 = 0.0010000000000000009
+	        tmp536 = tmp534 * tmp535
+	        tmp537 = tmp533 + tmp536
+	        tmp539 = libdevice.sqrt(tmp537)
+	        tmp542 = libdevice.pow(tmp532, tmp541)
+	        tmp543 = 1.0
+	        tmp544 = tmp542 - tmp543
+	        tmp545 = -tmp544
+	        tmp546 = libdevice.sqrt(tmp545)
+	        tmp547 = tmp539 / tmp546
+	        tmp548 = 1e-08
+	        tmp549 = tmp547 + tmp548
+	        tmp550 = 0.9
+	        tmp551 = libdevice.pow(tmp550, tmp541)
+	        tmp552 = tmp551 - tmp543
+	        tmp553 = 100.0
+	        tmp554 = tmp552 * tmp553
+	        tmp555 = tl.full([1], 1, tl.int32)
+	        tmp556 = tmp555 / tmp554
+	        tmp557 = tmp549 / tmp556
+	        tmp558 = tmp530 / tmp557
+	        tmp559 = tmp538 + tmp558
+	        tl.store(out_ptr60 + (x15), tmp530, xmask)
+	        tl.store(out_ptr62 + (x15), tmp559, xmask)
+	        tl.store(out_ptr63 + (x15), tmp537, xmask)
+	    elif pid < num_xblocks_16:
+	        pid_offset = pid - num_xblocks_15
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x16 = xindex
+	        tmp560 = tl.load(in_ptr80 + (x16), None)
+	        tmp561 = tl.load(in_ptr81 + (x16), None)
+	        tmp566 = tl.load(in_ptr82 + (x16), None)
+	        tmp573 = tl.load(in_ptr83 + (x16), None)
+	        tmp575 = tl.load(in_ptr84 + (0))
+	        tmp576 = tl.broadcast_to(tmp575, [XBLOCK])
+	        tmp562 = tmp561 - tmp560
+	        tmp563 = 0.09999999999999998
+	        tmp564 = tmp562 * tmp563
+	        tmp565 = tmp560 + tmp564
+	        tmp567 = 0.999
+	        tmp568 = tmp566 * tmp567
+	        tmp569 = tmp561 * tmp561
+	        tmp570 = 0.0010000000000000009
+	        tmp571 = tmp569 * tmp570
+	        tmp572 = tmp568 + tmp571
+	        tmp574 = libdevice.sqrt(tmp572)
+	        tmp577 = libdevice.pow(tmp567, tmp576)
+	        tmp578 = 1.0
+	        tmp579 = tmp577 - tmp578
+	        tmp580 = -tmp579
+	        tmp581 = libdevice.sqrt(tmp580)
+	        tmp582 = tmp574 / tmp581
+	        tmp583 = 1e-08
+	        tmp584 = tmp582 + tmp583
+	        tmp585 = 0.9
+	        tmp586 = libdevice.pow(tmp585, tmp576)
+	        tmp587 = tmp586 - tmp578
+	        tmp588 = 100.0
+	        tmp589 = tmp587 * tmp588
+	        tmp590 = tl.full([1], 1, tl.int32)
+	        tmp591 = tmp590 / tmp589
+	        tmp592 = tmp584 / tmp591
+	        tmp593 = tmp565 / tmp592
+	        tmp594 = tmp573 + tmp593
+	        tl.store(out_ptr64 + (x16), tmp565, None)
+	        tl.store(out_ptr66 + (x16), tmp594, None)
+	        tl.store(out_ptr67 + (x16), tmp572, None)
+	    elif pid < num_xblocks_17:
+	        pid_offset = pid - num_xblocks_16
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x17 = xindex
+	        tmp595 = tl.load(in_ptr85 + (x17), xmask)
+	        tmp596 = tl.load(in_ptr86 + (x17), xmask)
+	        tmp601 = tl.load(in_ptr87 + (x17), xmask)
+	        tmp608 = tl.load(in_ptr88 + (x17), xmask)
+	        tmp610 = tl.load(in_ptr89 + (0))
+	        tmp611 = tl.broadcast_to(tmp610, [XBLOCK])
+	        tmp597 = tmp596 - tmp595
+	        tmp598 = 0.09999999999999998
+	        tmp599 = tmp597 * tmp598
+	        tmp600 = tmp595 + tmp599
+	        tmp602 = 0.999
+	        tmp603 = tmp601 * tmp602
+	        tmp604 = tmp596 * tmp596
+	        tmp605 = 0.0010000000000000009
+	        tmp606 = tmp604 * tmp605
+	        tmp607 = tmp603 + tmp606
+	        tmp609 = libdevice.sqrt(tmp607)
+	        tmp612 = libdevice.pow(tmp602, tmp611)
+	        tmp613 = 1.0
+	        tmp614 = tmp612 - tmp613
+	        tmp615 = -tmp614
+	        tmp616 = libdevice.sqrt(tmp615)
+	        tmp617 = tmp609 / tmp616
+	        tmp618 = 1e-08
+	        tmp619 = tmp617 + tmp618
+	        tmp620 = 0.9
+	        tmp621 = libdevice.pow(tmp620, tmp611)
+	        tmp622 = tmp621 - tmp613
+	        tmp623 = 100.0
+	        tmp624 = tmp622 * tmp623
+	        tmp625 = tl.full([1], 1, tl.int32)
+	        tmp626 = tmp625 / tmp624
+	        tmp627 = tmp619 / tmp626
+	        tmp628 = tmp600 / tmp627
+	        tmp629 = tmp608 + tmp628
+	        tl.store(out_ptr68 + (x17), tmp600, xmask)
+	        tl.store(out_ptr70 + (x17), tmp629, xmask)
+	        tl.store(out_ptr71 + (x17), tmp607, xmask)
+	    elif pid < num_xblocks_18:
+	        pid_offset = pid - num_xblocks_17
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x18 = xindex
+	        tmp630 = tl.load(in_ptr90 + (x18), None)
+	        tmp631 = tl.load(in_ptr91 + (x18), None)
+	        tmp636 = tl.load(in_ptr92 + (x18), None)
+	        tmp643 = tl.load(in_ptr93 + (x18), None)
+	        tmp645 = tl.load(in_ptr94 + (0))
+	        tmp646 = tl.broadcast_to(tmp645, [XBLOCK])
+	        tmp632 = tmp631 - tmp630
+	        tmp633 = 0.09999999999999998
+	        tmp634 = tmp632 * tmp633
+	        tmp635 = tmp630 + tmp634
+	        tmp637 = 0.999
+	        tmp638 = tmp636 * tmp637
+	        tmp639 = tmp631 * tmp631
+	        tmp640 = 0.0010000000000000009
+	        tmp641 = tmp639 * tmp640
+	        tmp642 = tmp638 + tmp641
+	        tmp644 = libdevice.sqrt(tmp642)
+	        tmp647 = libdevice.pow(tmp637, tmp646)
+	        tmp648 = 1.0
+	        tmp649 = tmp647 - tmp648
+	        tmp650 = -tmp649
+	        tmp651 = libdevice.sqrt(tmp650)
+	        tmp652 = tmp644 / tmp651
+	        tmp653 = 1e-08
+	        tmp654 = tmp652 + tmp653
+	        tmp655 = 0.9
+	        tmp656 = libdevice.pow(tmp655, tmp646)
+	        tmp657 = tmp656 - tmp648
+	        tmp658 = 100.0
+	        tmp659 = tmp657 * tmp658
+	        tmp660 = tl.full([1], 1, tl.int32)
+	        tmp661 = tmp660 / tmp659
+	        tmp662 = tmp654 / tmp661
+	        tmp663 = tmp635 / tmp662
+	        tmp664 = tmp643 + tmp663
+	        tl.store(out_ptr72 + (x18), tmp635, None)
+	        tl.store(out_ptr74 + (x18), tmp664, None)
+	        tl.store(out_ptr75 + (x18), tmp642, None)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp2ln889l5/4r/c4rqbae7wfx3e3mqatzcxcyp2mum6rfdbywgpqy7ptg44uaonbzr.py
+	# Source Nodes: [], Original ATen: []
+	
+	triton_for_fused_9 = async_compile.triton('triton_', '''
+	import triton
+	import triton.language as tl
+	from triton.compiler.compiler import AttrsDescriptor
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, instance_descriptor, DeviceProperties
+	
+	@triton_heuristics.foreach(
+	    num_warps=8,
+	    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_for_fused_9', 'mutated_arg_names': ['in_ptr0', 'in_ptr10', 'in_ptr12', 'in_ptr13', 'in_ptr15', 'in_ptr17', 'in_ptr18', 'in_ptr2', 'in_ptr20', 'in_ptr22', 'in_ptr23', 'in_ptr25', 'in_ptr27', 'in_ptr28', 'in_ptr3', 'in_ptr30', 'in_ptr32', 'in_ptr33', 'in_ptr35', 'in_ptr37', 'in_ptr38', 'in_ptr40', 'in_ptr42', 'in_ptr43', 'in_ptr45', 'in_ptr47', 'in_ptr48', 'in_ptr5', 'in_ptr50', 'in_ptr52', 'in_ptr53', 'in_ptr55', 'in_ptr57', 'in_ptr58', 'in_ptr60', 'in_ptr62', 'in_ptr63', 'in_ptr65', 'in_ptr67', 'in_ptr68', 'in_ptr7', 'in_ptr70', 'in_ptr72', 'in_ptr73', 'in_ptr8', 'out_ptr0', 'out_ptr10', 'out_ptr11', 'out_ptr12', 'out_ptr14', 'out_ptr15', 'out_ptr16', 'out_ptr18', 'out_ptr19', 'out_ptr2', 'out_ptr20', 'out_ptr22', 'out_ptr23', 'out_ptr24', 'out_ptr26', 'out_ptr27', 'out_ptr28', 'out_ptr3', 'out_ptr30', 'out_ptr31', 'out_ptr32', 'out_ptr34', 'out_ptr35', 'out_ptr36', 'out_ptr38', 'out_ptr39', 'out_ptr4', 'out_ptr40', 'out_ptr42', 'out_ptr43', 'out_ptr44', 'out_ptr46', 'out_ptr47', 'out_ptr48', 'out_ptr50', 'out_ptr51', 'out_ptr52', 'out_ptr54', 'out_ptr55', 'out_ptr56', 'out_ptr58', 'out_ptr59', 'out_ptr6', 'out_ptr7', 'out_ptr8'], 'backend_hash': 'B0714AF1B5CA55D285C0AA74C7669268EFE45FDD9B7A0A70183B8D89AC8FCAC4', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	)
+	@triton.jit
+	def triton_(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, out_ptr0, out_ptr2, out_ptr3, out_ptr4, out_ptr6, out_ptr7, out_ptr8, out_ptr10, out_ptr11, out_ptr12, out_ptr14, out_ptr15, out_ptr16, out_ptr18, out_ptr19, out_ptr20, out_ptr22, out_ptr23, out_ptr24, out_ptr26, out_ptr27, out_ptr28, out_ptr30, out_ptr31, out_ptr32, out_ptr34, out_ptr35, out_ptr36, out_ptr38, out_ptr39, out_ptr40, out_ptr42, out_ptr43, out_ptr44, out_ptr46, out_ptr47, out_ptr48, out_ptr50, out_ptr51, out_ptr52, out_ptr54, out_ptr55, out_ptr56, out_ptr58, out_ptr59):
+	    pid = tl.program_id(0)
+	    XBLOCK: tl.constexpr = 1024
+	    num_xblocks_0 = tl.cdiv(768, XBLOCK)
+	    num_xblocks_1 = num_xblocks_0 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_2 = num_xblocks_1 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_3 = num_xblocks_2 + tl.cdiv(1769472, XBLOCK)
+	    num_xblocks_4 = num_xblocks_3 + tl.cdiv(2304, XBLOCK)
+	    num_xblocks_5 = num_xblocks_4 + tl.cdiv(589824, XBLOCK)
+	    num_xblocks_6 = num_xblocks_5 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_7 = num_xblocks_6 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_8 = num_xblocks_7 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_9 = num_xblocks_8 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_10 = num_xblocks_9 + tl.cdiv(3072, XBLOCK)
+	    num_xblocks_11 = num_xblocks_10 + tl.cdiv(2359296, XBLOCK)
+	    num_xblocks_12 = num_xblocks_11 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_13 = num_xblocks_12 + tl.cdiv(768, XBLOCK)
+	    num_xblocks_14 = num_xblocks_13 + tl.cdiv(768, XBLOCK)
+	    if pid < num_xblocks_0:
+	        pid_offset = pid
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr1 + (x0), xmask)
+	        tmp6 = tl.load(in_ptr2 + (x0), xmask)
+	        tmp13 = tl.load(in_ptr3 + (x0), xmask)
+	        tmp15 = tl.load(in_ptr4 + (0))
+	        tmp16 = tl.broadcast_to(tmp15, [XBLOCK])
+	        tmp2 = tmp1 - tmp0
+	        tmp3 = 0.09999999999999998
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = tmp0 + tmp4
+	        tmp7 = 0.999
+	        tmp8 = tmp6 * tmp7
+	        tmp9 = tmp1 * tmp1
+	        tmp10 = 0.0010000000000000009
+	        tmp11 = tmp9 * tmp10
+	        tmp12 = tmp8 + tmp11
+	        tmp14 = libdevice.sqrt(tmp12)
+	        tmp17 = libdevice.pow(tmp7, tmp16)
+	        tmp18 = 1.0
+	        tmp19 = tmp17 - tmp18
+	        tmp20 = -tmp19
+	        tmp21 = libdevice.sqrt(tmp20)
+	        tmp22 = tmp14 / tmp21
+	        tmp23 = 1e-08
+	        tmp24 = tmp22 + tmp23
+	        tmp25 = 0.9
+	        tmp26 = libdevice.pow(tmp25, tmp16)
+	        tmp27 = tmp26 - tmp18
+	        tmp28 = 100.0
+	        tmp29 = tmp27 * tmp28
+	        tmp30 = tl.full([1], 1, tl.int32)
+	        tmp31 = tmp30 / tmp29
+	        tmp32 = tmp24 / tmp31
+	        tmp33 = tmp5 / tmp32
+	        tmp34 = tmp13 + tmp33
+	        tl.store(out_ptr0 + (x0), tmp5, xmask)
+	        tl.store(out_ptr2 + (x0), tmp34, xmask)
+	        tl.store(out_ptr3 + (x0), tmp12, xmask)
+	    elif pid < num_xblocks_1:
+	        pid_offset = pid - num_xblocks_0
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x1 = xindex
+	        tmp35 = tl.load(in_ptr5 + (x1), xmask)
+	        tmp36 = tl.load(in_ptr6 + (x1), xmask)
+	        tmp41 = tl.load(in_ptr7 + (x1), xmask)
+	        tmp48 = tl.load(in_ptr8 + (x1), xmask)
+	        tmp50 = tl.load(in_ptr9 + (0))
+	        tmp51 = tl.broadcast_to(tmp50, [XBLOCK])
+	        tmp37 = tmp36 - tmp35
+	        tmp38 = 0.09999999999999998
+	        tmp39 = tmp37 * tmp38
+	        tmp40 = tmp35 + tmp39
+	        tmp42 = 0.999
+	        tmp43 = tmp41 * tmp42
+	        tmp44 = tmp36 * tmp36
+	        tmp45 = 0.0010000000000000009
+	        tmp46 = tmp44 * tmp45
+	        tmp47 = tmp43 + tmp46
+	        tmp49 = libdevice.sqrt(tmp47)
+	        tmp52 = libdevice.pow(tmp42, tmp51)
+	        tmp53 = 1.0
+	        tmp54 = tmp52 - tmp53
+	        tmp55 = -tmp54
+	        tmp56 = libdevice.sqrt(tmp55)
+	        tmp57 = tmp49 / tmp56
+	        tmp58 = 1e-08
+	        tmp59 = tmp57 + tmp58
+	        tmp60 = 0.9
+	        tmp61 = libdevice.pow(tmp60, tmp51)
+	        tmp62 = tmp61 - tmp53
+	        tmp63 = 100.0
+	        tmp64 = tmp62 * tmp63
+	        tmp65 = tl.full([1], 1, tl.int32)
+	        tmp66 = tmp65 / tmp64
+	        tmp67 = tmp59 / tmp66
+	        tmp68 = tmp40 / tmp67
+	        tmp69 = tmp48 + tmp68
+	        tl.store(out_ptr4 + (x1), tmp40, xmask)
+	        tl.store(out_ptr6 + (x1), tmp69, xmask)
+	        tl.store(out_ptr7 + (x1), tmp47, xmask)
+	    elif pid < num_xblocks_2:
+	        pid_offset = pid - num_xblocks_1
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        tmp70 = tl.load(in_ptr10 + (x2), xmask)
+	        tmp71 = tl.load(in_ptr11 + (x2), xmask)
+	        tmp76 = tl.load(in_ptr12 + (x2), xmask)
+	        tmp83 = tl.load(in_ptr13 + (x2), xmask)
+	        tmp85 = tl.load(in_ptr14 + (0))
+	        tmp86 = tl.broadcast_to(tmp85, [XBLOCK])
+	        tmp72 = tmp71 - tmp70
+	        tmp73 = 0.09999999999999998
+	        tmp74 = tmp72 * tmp73
+	        tmp75 = tmp70 + tmp74
+	        tmp77 = 0.999
+	        tmp78 = tmp76 * tmp77
+	        tmp79 = tmp71 * tmp71
+	        tmp80 = 0.0010000000000000009
+	        tmp81 = tmp79 * tmp80
+	        tmp82 = tmp78 + tmp81
+	        tmp84 = libdevice.sqrt(tmp82)
+	        tmp87 = libdevice.pow(tmp77, tmp86)
+	        tmp88 = 1.0
+	        tmp89 = tmp87 - tmp88
+	        tmp90 = -tmp89
+	        tmp91 = libdevice.sqrt(tmp90)
+	        tmp92 = tmp84 / tmp91
+	        tmp93 = 1e-08
+	        tmp94 = tmp92 + tmp93
+	        tmp95 = 0.9
+	        tmp96 = libdevice.pow(tmp95, tmp86)
+	        tmp97 = tmp96 - tmp88
+	        tmp98 = 100.0
+	        tmp99 = tmp97 * tmp98
+	        tmp100 = tl.full([1], 1, tl.int32)
+	        tmp101 = tmp100 / tmp99
+	        tmp102 = tmp94 / tmp101
+	        tmp103 = tmp75 / tmp102
+	        tmp104 = tmp83 + tmp103
+	        tl.store(out_ptr8 + (x2), tmp75, xmask)
+	        tl.store(out_ptr10 + (x2), tmp104, xmask)
+	        tl.store(out_ptr11 + (x2), tmp82, xmask)
+	    elif pid < num_xblocks_3:
+	        pid_offset = pid - num_xblocks_2
+	        xnumel = 1769472
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x3 = xindex
+	        tmp105 = tl.load(in_ptr15 + (x3), None)
+	        tmp106 = tl.load(in_ptr16 + (x3), None)
+	        tmp111 = tl.load(in_ptr17 + (x3), None)
+	        tmp118 = tl.load(in_ptr18 + (x3), None)
+	        tmp120 = tl.load(in_ptr19 + (0))
+	        tmp121 = tl.broadcast_to(tmp120, [XBLOCK])
+	        tmp107 = tmp106 - tmp105
+	        tmp108 = 0.09999999999999998
+	        tmp109 = tmp107 * tmp108
+	        tmp110 = tmp105 + tmp109
+	        tmp112 = 0.999
+	        tmp113 = tmp111 * tmp112
+	        tmp114 = tmp106 * tmp106
+	        tmp115 = 0.0010000000000000009
+	        tmp116 = tmp114 * tmp115
+	        tmp117 = tmp113 + tmp116
+	        tmp119 = libdevice.sqrt(tmp117)
+	        tmp122 = libdevice.pow(tmp112, tmp121)
+	        tmp123 = 1.0
+	        tmp124 = tmp122 - tmp123
+	        tmp125 = -tmp124
+	        tmp126 = libdevice.sqrt(tmp125)
+	        tmp127 = tmp119 / tmp126
+	        tmp128 = 1e-08
+	        tmp129 = tmp127 + tmp128
+	        tmp130 = 0.9
+	        tmp131 = libdevice.pow(tmp130, tmp121)
+	        tmp132 = tmp131 - tmp123
+	        tmp133 = 100.0
+	        tmp134 = tmp132 * tmp133
+	        tmp135 = tl.full([1], 1, tl.int32)
+	        tmp136 = tmp135 / tmp134
+	        tmp137 = tmp129 / tmp136
+	        tmp138 = tmp110 / tmp137
+	        tmp139 = tmp118 + tmp138
+	        tl.store(out_ptr12 + (x3), tmp110, None)
+	        tl.store(out_ptr14 + (x3), tmp139, None)
+	        tl.store(out_ptr15 + (x3), tmp117, None)
+	    elif pid < num_xblocks_4:
+	        pid_offset = pid - num_xblocks_3
+	        xnumel = 2304
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x4 = xindex
+	        tmp140 = tl.load(in_ptr20 + (x4), xmask)
+	        tmp141 = tl.load(in_ptr21 + (x4), xmask)
+	        tmp146 = tl.load(in_ptr22 + (x4), xmask)
+	        tmp153 = tl.load(in_ptr23 + (x4), xmask)
+	        tmp155 = tl.load(in_ptr24 + (0))
+	        tmp156 = tl.broadcast_to(tmp155, [XBLOCK])
+	        tmp142 = tmp141 - tmp140
+	        tmp143 = 0.09999999999999998
+	        tmp144 = tmp142 * tmp143
+	        tmp145 = tmp140 + tmp144
+	        tmp147 = 0.999
+	        tmp148 = tmp146 * tmp147
+	        tmp149 = tmp141 * tmp141
+	        tmp150 = 0.0010000000000000009
+	        tmp151 = tmp149 * tmp150
+	        tmp152 = tmp148 + tmp151
+	        tmp154 = libdevice.sqrt(tmp152)
+	        tmp157 = libdevice.pow(tmp147, tmp156)
+	        tmp158 = 1.0
+	        tmp159 = tmp157 - tmp158
+	        tmp160 = -tmp159
+	        tmp161 = libdevice.sqrt(tmp160)
+	        tmp162 = tmp154 / tmp161
+	        tmp163 = 1e-08
+	        tmp164 = tmp162 + tmp163
+	        tmp165 = 0.9
+	        tmp166 = libdevice.pow(tmp165, tmp156)
+	        tmp167 = tmp166 - tmp158
+	        tmp168 = 100.0
+	        tmp169 = tmp167 * tmp168
+	        tmp170 = tl.full([1], 1, tl.int32)
+	        tmp171 = tmp170 / tmp169
+	        tmp172 = tmp164 / tmp171
+	        tmp173 = tmp145 / tmp172
+	        tmp174 = tmp153 + tmp173
+	        tl.store(out_ptr16 + (x4), tmp145, xmask)
+	        tl.store(out_ptr18 + (x4), tmp174, xmask)
+	        tl.store(out_ptr19 + (x4), tmp152, xmask)
+	    elif pid < num_xblocks_5:
+	        pid_offset = pid - num_xblocks_4
+	        xnumel = 589824
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x5 = xindex
+	        tmp175 = tl.load(in_ptr25 + (x5), None)
+	        tmp176 = tl.load(in_ptr26 + (x5), None)
+	        tmp181 = tl.load(in_ptr27 + (x5), None)
+	        tmp188 = tl.load(in_ptr28 + (x5), None)
+	        tmp190 = tl.load(in_ptr29 + (0))
+	        tmp191 = tl.broadcast_to(tmp190, [XBLOCK])
+	        tmp177 = tmp176 - tmp175
+	        tmp178 = 0.09999999999999998
+	        tmp179 = tmp177 * tmp178
+	        tmp180 = tmp175 + tmp179
+	        tmp182 = 0.999
+	        tmp183 = tmp181 * tmp182
+	        tmp184 = tmp176 * tmp176
+	        tmp185 = 0.0010000000000000009
+	        tmp186 = tmp184 * tmp185
+	        tmp187 = tmp183 + tmp186
+	        tmp189 = libdevice.sqrt(tmp187)
+	        tmp192 = libdevice.pow(tmp182, tmp191)
+	        tmp193 = 1.0
+	        tmp194 = tmp192 - tmp193
+	        tmp195 = -tmp194
+	        tmp196 = libdevice.sqrt(tmp195)
+	        tmp197 = tmp189 / tmp196
+	        tmp198 = 1e-08
+	        tmp199 = tmp197 + tmp198
+	        tmp200 = 0.9
+	        tmp201 = libdevice.pow(tmp200, tmp191)
+	        tmp202 = tmp201 - tmp193
+	        tmp203 = 100.0
+	        tmp204 = tmp202 * tmp203
+	        tmp205 = tl.full([1], 1, tl.int32)
+	        tmp206 = tmp205 / tmp204
+	        tmp207 = tmp199 / tmp206
+	        tmp208 = tmp180 / tmp207
+	        tmp209 = tmp188 + tmp208
+	        tl.store(out_ptr20 + (x5), tmp180, None)
+	        tl.store(out_ptr22 + (x5), tmp209, None)
+	        tl.store(out_ptr23 + (x5), tmp187, None)
+	    elif pid < num_xblocks_6:
+	        pid_offset = pid - num_xblocks_5
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x6 = xindex
+	        tmp210 = tl.load(in_ptr30 + (x6), xmask)
+	        tmp211 = tl.load(in_ptr31 + (x6), xmask)
+	        tmp216 = tl.load(in_ptr32 + (x6), xmask)
+	        tmp223 = tl.load(in_ptr33 + (x6), xmask)
+	        tmp225 = tl.load(in_ptr34 + (0))
+	        tmp226 = tl.broadcast_to(tmp225, [XBLOCK])
+	        tmp212 = tmp211 - tmp210
+	        tmp213 = 0.09999999999999998
+	        tmp214 = tmp212 * tmp213
+	        tmp215 = tmp210 + tmp214
+	        tmp217 = 0.999
+	        tmp218 = tmp216 * tmp217
+	        tmp219 = tmp211 * tmp211
+	        tmp220 = 0.0010000000000000009
+	        tmp221 = tmp219 * tmp220
+	        tmp222 = tmp218 + tmp221
+	        tmp224 = libdevice.sqrt(tmp222)
+	        tmp227 = libdevice.pow(tmp217, tmp226)
+	        tmp228 = 1.0
+	        tmp229 = tmp227 - tmp228
+	        tmp230 = -tmp229
+	        tmp231 = libdevice.sqrt(tmp230)
+	        tmp232 = tmp224 / tmp231
+	        tmp233 = 1e-08
+	        tmp234 = tmp232 + tmp233
+	        tmp235 = 0.9
+	        tmp236 = libdevice.pow(tmp235, tmp226)
+	        tmp237 = tmp236 - tmp228
+	        tmp238 = 100.0
+	        tmp239 = tmp237 * tmp238
+	        tmp240 = tl.full([1], 1, tl.int32)
+	        tmp241 = tmp240 / tmp239
+	        tmp242 = tmp234 / tmp241
+	        tmp243 = tmp215 / tmp242
+	        tmp244 = tmp223 + tmp243
+	        tl.store(out_ptr24 + (x6), tmp215, xmask)
+	        tl.store(out_ptr26 + (x6), tmp244, xmask)
+	        tl.store(out_ptr27 + (x6), tmp222, xmask)
+	    elif pid < num_xblocks_7:
+	        pid_offset = pid - num_xblocks_6
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x7 = xindex
+	        tmp245 = tl.load(in_ptr35 + (x7), xmask)
+	        tmp246 = tl.load(in_ptr36 + (x7), xmask)
+	        tmp251 = tl.load(in_ptr37 + (x7), xmask)
+	        tmp258 = tl.load(in_ptr38 + (x7), xmask)
+	        tmp260 = tl.load(in_ptr39 + (0))
+	        tmp261 = tl.broadcast_to(tmp260, [XBLOCK])
+	        tmp247 = tmp246 - tmp245
+	        tmp248 = 0.09999999999999998
+	        tmp249 = tmp247 * tmp248
+	        tmp250 = tmp245 + tmp249
+	        tmp252 = 0.999
+	        tmp253 = tmp251 * tmp252
+	        tmp254 = tmp246 * tmp246
+	        tmp255 = 0.0010000000000000009
+	        tmp256 = tmp254 * tmp255
+	        tmp257 = tmp253 + tmp256
+	        tmp259 = libdevice.sqrt(tmp257)
+	        tmp262 = libdevice.pow(tmp252, tmp261)
+	        tmp263 = 1.0
+	        tmp264 = tmp262 - tmp263
+	        tmp265 = -tmp264
+	        tmp266 = libdevice.sqrt(tmp265)
+	        tmp267 = tmp259 / tmp266
+	        tmp268 = 1e-08
+	        tmp269 = tmp267 + tmp268
+	        tmp270 = 0.9
+	        tmp271 = libdevice.pow(tmp270, tmp261)
+	        tmp272 = tmp271 - tmp263
+	        tmp273 = 100.0
+	        tmp274 = tmp272 * tmp273
+	        tmp275 = tl.full([1], 1, tl.int32)
+	        tmp276 = tmp275 / tmp274
+	        tmp277 = tmp269 / tmp276
+	        tmp278 = tmp250 / tmp277
+	        tmp279 = tmp258 + tmp278
+	        tl.store(out_ptr28 + (x7), tmp250, xmask)
+	        tl.store(out_ptr30 + (x7), tmp279, xmask)
+	        tl.store(out_ptr31 + (x7), tmp257, xmask)
+	    elif pid < num_xblocks_8:
+	        pid_offset = pid - num_xblocks_7
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x8 = xindex
+	        tmp280 = tl.load(in_ptr40 + (x8), xmask)
+	        tmp281 = tl.load(in_ptr41 + (x8), xmask)
+	        tmp286 = tl.load(in_ptr42 + (x8), xmask)
+	        tmp293 = tl.load(in_ptr43 + (x8), xmask)
+	        tmp295 = tl.load(in_ptr44 + (0))
+	        tmp296 = tl.broadcast_to(tmp295, [XBLOCK])
+	        tmp282 = tmp281 - tmp280
+	        tmp283 = 0.09999999999999998
+	        tmp284 = tmp282 * tmp283
+	        tmp285 = tmp280 + tmp284
+	        tmp287 = 0.999
+	        tmp288 = tmp286 * tmp287
+	        tmp289 = tmp281 * tmp281
+	        tmp290 = 0.0010000000000000009
+	        tmp291 = tmp289 * tmp290
+	        tmp292 = tmp288 + tmp291
+	        tmp294 = libdevice.sqrt(tmp292)
+	        tmp297 = libdevice.pow(tmp287, tmp296)
+	        tmp298 = 1.0
+	        tmp299 = tmp297 - tmp298
+	        tmp300 = -tmp299
+	        tmp301 = libdevice.sqrt(tmp300)
+	        tmp302 = tmp294 / tmp301
+	        tmp303 = 1e-08
+	        tmp304 = tmp302 + tmp303
+	        tmp305 = 0.9
+	        tmp306 = libdevice.pow(tmp305, tmp296)
+	        tmp307 = tmp306 - tmp298
+	        tmp308 = 100.0
+	        tmp309 = tmp307 * tmp308
+	        tmp310 = tl.full([1], 1, tl.int32)
+	        tmp311 = tmp310 / tmp309
+	        tmp312 = tmp304 / tmp311
+	        tmp313 = tmp285 / tmp312
+	        tmp314 = tmp293 + tmp313
+	        tl.store(out_ptr32 + (x8), tmp285, xmask)
+	        tl.store(out_ptr34 + (x8), tmp314, xmask)
+	        tl.store(out_ptr35 + (x8), tmp292, xmask)
+	    elif pid < num_xblocks_9:
+	        pid_offset = pid - num_xblocks_8
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x9 = xindex
+	        tmp315 = tl.load(in_ptr45 + (x9), None)
+	        tmp316 = tl.load(in_ptr46 + (x9), None)
+	        tmp321 = tl.load(in_ptr47 + (x9), None)
+	        tmp328 = tl.load(in_ptr48 + (x9), None)
+	        tmp330 = tl.load(in_ptr49 + (0))
+	        tmp331 = tl.broadcast_to(tmp330, [XBLOCK])
+	        tmp317 = tmp316 - tmp315
+	        tmp318 = 0.09999999999999998
+	        tmp319 = tmp317 * tmp318
+	        tmp320 = tmp315 + tmp319
+	        tmp322 = 0.999
+	        tmp323 = tmp321 * tmp322
+	        tmp324 = tmp316 * tmp316
+	        tmp325 = 0.0010000000000000009
+	        tmp326 = tmp324 * tmp325
+	        tmp327 = tmp323 + tmp326
+	        tmp329 = libdevice.sqrt(tmp327)
+	        tmp332 = libdevice.pow(tmp322, tmp331)
+	        tmp333 = 1.0
+	        tmp334 = tmp332 - tmp333
+	        tmp335 = -tmp334
+	        tmp336 = libdevice.sqrt(tmp335)
+	        tmp337 = tmp329 / tmp336
+	        tmp338 = 1e-08
+	        tmp339 = tmp337 + tmp338
+	        tmp340 = 0.9
+	        tmp341 = libdevice.pow(tmp340, tmp331)
+	        tmp342 = tmp341 - tmp333
+	        tmp343 = 100.0
+	        tmp344 = tmp342 * tmp343
+	        tmp345 = tl.full([1], 1, tl.int32)
+	        tmp346 = tmp345 / tmp344
+	        tmp347 = tmp339 / tmp346
+	        tmp348 = tmp320 / tmp347
+	        tmp349 = tmp328 + tmp348
+	        tl.store(out_ptr36 + (x9), tmp320, None)
+	        tl.store(out_ptr38 + (x9), tmp349, None)
+	        tl.store(out_ptr39 + (x9), tmp327, None)
+	    elif pid < num_xblocks_10:
+	        pid_offset = pid - num_xblocks_9
+	        xnumel = 3072
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x10 = xindex
+	        tmp350 = tl.load(in_ptr50 + (x10), xmask)
+	        tmp351 = tl.load(in_ptr51 + (x10), xmask)
+	        tmp356 = tl.load(in_ptr52 + (x10), xmask)
+	        tmp363 = tl.load(in_ptr53 + (x10), xmask)
+	        tmp365 = tl.load(in_ptr54 + (0))
+	        tmp366 = tl.broadcast_to(tmp365, [XBLOCK])
+	        tmp352 = tmp351 - tmp350
+	        tmp353 = 0.09999999999999998
+	        tmp354 = tmp352 * tmp353
+	        tmp355 = tmp350 + tmp354
+	        tmp357 = 0.999
+	        tmp358 = tmp356 * tmp357
+	        tmp359 = tmp351 * tmp351
+	        tmp360 = 0.0010000000000000009
+	        tmp361 = tmp359 * tmp360
+	        tmp362 = tmp358 + tmp361
+	        tmp364 = libdevice.sqrt(tmp362)
+	        tmp367 = libdevice.pow(tmp357, tmp366)
+	        tmp368 = 1.0
+	        tmp369 = tmp367 - tmp368
+	        tmp370 = -tmp369
+	        tmp371 = libdevice.sqrt(tmp370)
+	        tmp372 = tmp364 / tmp371
+	        tmp373 = 1e-08
+	        tmp374 = tmp372 + tmp373
+	        tmp375 = 0.9
+	        tmp376 = libdevice.pow(tmp375, tmp366)
+	        tmp377 = tmp376 - tmp368
+	        tmp378 = 100.0
+	        tmp379 = tmp377 * tmp378
+	        tmp380 = tl.full([1], 1, tl.int32)
+	        tmp381 = tmp380 / tmp379
+	        tmp382 = tmp374 / tmp381
+	        tmp383 = tmp355 / tmp382
+	        tmp384 = tmp363 + tmp383
+	        tl.store(out_ptr40 + (x10), tmp355, xmask)
+	        tl.store(out_ptr42 + (x10), tmp384, xmask)
+	        tl.store(out_ptr43 + (x10), tmp362, xmask)
+	    elif pid < num_xblocks_11:
+	        pid_offset = pid - num_xblocks_10
+	        xnumel = 2359296
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = tl.full([XBLOCK], True, tl.int1)
+	        x11 = xindex
+	        tmp385 = tl.load(in_ptr55 + (x11), None)
+	        tmp386 = tl.load(in_ptr56 + (x11), None)
+	        tmp391 = tl.load(in_ptr57 + (x11), None)
+	        tmp398 = tl.load(in_ptr58 + (x11), None)
+	        tmp400 = tl.load(in_ptr59 + (0))
+	        tmp401 = tl.broadcast_to(tmp400, [XBLOCK])
+	        tmp387 = tmp386 - tmp385
+	        tmp388 = 0.09999999999999998
+	        tmp389 = tmp387 * tmp388
+	        tmp390 = tmp385 + tmp389
+	        tmp392 = 0.999
+	        tmp393 = tmp391 * tmp392
+	        tmp394 = tmp386 * tmp386
+	        tmp395 = 0.0010000000000000009
+	        tmp396 = tmp394 * tmp395
+	        tmp397 = tmp393 + tmp396
+	        tmp399 = libdevice.sqrt(tmp397)
+	        tmp402 = libdevice.pow(tmp392, tmp401)
+	        tmp403 = 1.0
+	        tmp404 = tmp402 - tmp403
+	        tmp405 = -tmp404
+	        tmp406 = libdevice.sqrt(tmp405)
+	        tmp407 = tmp399 / tmp406
+	        tmp408 = 1e-08
+	        tmp409 = tmp407 + tmp408
+	        tmp410 = 0.9
+	        tmp411 = libdevice.pow(tmp410, tmp401)
+	        tmp412 = tmp411 - tmp403
+	        tmp413 = 100.0
+	        tmp414 = tmp412 * tmp413
+	        tmp415 = tl.full([1], 1, tl.int32)
+	        tmp416 = tmp415 / tmp414
+	        tmp417 = tmp409 / tmp416
+	        tmp418 = tmp390 / tmp417
+	        tmp419 = tmp398 + tmp418
+	        tl.store(out_ptr44 + (x11), tmp390, None)
+	        tl.store(out_ptr46 + (x11), tmp419, None)
+	        tl.store(out_ptr47 + (x11), tmp397, None)
+	    elif pid < num_xblocks_12:
+	        pid_offset = pid - num_xblocks_11
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x12 = xindex
+	        tmp420 = tl.load(in_ptr60 + (x12), xmask)
+	        tmp421 = tl.load(in_ptr61 + (x12), xmask)
+	        tmp426 = tl.load(in_ptr62 + (x12), xmask)
+	        tmp433 = tl.load(in_ptr63 + (x12), xmask)
+	        tmp435 = tl.load(in_ptr64 + (0))
+	        tmp436 = tl.broadcast_to(tmp435, [XBLOCK])
+	        tmp422 = tmp421 - tmp420
+	        tmp423 = 0.09999999999999998
+	        tmp424 = tmp422 * tmp423
+	        tmp425 = tmp420 + tmp424
+	        tmp427 = 0.999
+	        tmp428 = tmp426 * tmp427
+	        tmp429 = tmp421 * tmp421
+	        tmp430 = 0.0010000000000000009
+	        tmp431 = tmp429 * tmp430
+	        tmp432 = tmp428 + tmp431
+	        tmp434 = libdevice.sqrt(tmp432)
+	        tmp437 = libdevice.pow(tmp427, tmp436)
+	        tmp438 = 1.0
+	        tmp439 = tmp437 - tmp438
+	        tmp440 = -tmp439
+	        tmp441 = libdevice.sqrt(tmp440)
+	        tmp442 = tmp434 / tmp441
+	        tmp443 = 1e-08
+	        tmp444 = tmp442 + tmp443
+	        tmp445 = 0.9
+	        tmp446 = libdevice.pow(tmp445, tmp436)
+	        tmp447 = tmp446 - tmp438
+	        tmp448 = 100.0
+	        tmp449 = tmp447 * tmp448
+	        tmp450 = tl.full([1], 1, tl.int32)
+	        tmp451 = tmp450 / tmp449
+	        tmp452 = tmp444 / tmp451
+	        tmp453 = tmp425 / tmp452
+	        tmp454 = tmp433 + tmp453
+	        tl.store(out_ptr48 + (x12), tmp425, xmask)
+	        tl.store(out_ptr50 + (x12), tmp454, xmask)
+	        tl.store(out_ptr51 + (x12), tmp432, xmask)
+	    elif pid < num_xblocks_13:
+	        pid_offset = pid - num_xblocks_12
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x13 = xindex
+	        tmp455 = tl.load(in_ptr65 + (x13), xmask)
+	        tmp456 = tl.load(in_ptr66 + (x13), xmask)
+	        tmp461 = tl.load(in_ptr67 + (x13), xmask)
+	        tmp468 = tl.load(in_ptr68 + (x13), xmask)
+	        tmp470 = tl.load(in_ptr69 + (0))
+	        tmp471 = tl.broadcast_to(tmp470, [XBLOCK])
+	        tmp457 = tmp456 - tmp455
+	        tmp458 = 0.09999999999999998
+	        tmp459 = tmp457 * tmp458
+	        tmp460 = tmp455 + tmp459
+	        tmp462 = 0.999
+	        tmp463 = tmp461 * tmp462
+	        tmp464 = tmp456 * tmp456
+	        tmp465 = 0.0010000000000000009
+	        tmp466 = tmp464 * tmp465
+	        tmp467 = tmp463 + tmp466
+	        tmp469 = libdevice.sqrt(tmp467)
+	        tmp472 = libdevice.pow(tmp462, tmp471)
+	        tmp473 = 1.0
+	        tmp474 = tmp472 - tmp473
+	        tmp475 = -tmp474
+	        tmp476 = libdevice.sqrt(tmp475)
+	        tmp477 = tmp469 / tmp476
+	        tmp478 = 1e-08
+	        tmp479 = tmp477 + tmp478
+	        tmp480 = 0.9
+	        tmp481 = libdevice.pow(tmp480, tmp471)
+	        tmp482 = tmp481 - tmp473
+	        tmp483 = 100.0
+	        tmp484 = tmp482 * tmp483
+	        tmp485 = tl.full([1], 1, tl.int32)
+	        tmp486 = tmp485 / tmp484
+	        tmp487 = tmp479 / tmp486
+	        tmp488 = tmp460 / tmp487
+	        tmp489 = tmp468 + tmp488
+	        tl.store(out_ptr52 + (x13), tmp460, xmask)
+	        tl.store(out_ptr54 + (x13), tmp489, xmask)
+	        tl.store(out_ptr55 + (x13), tmp467, xmask)
+	    elif pid < num_xblocks_14:
+	        pid_offset = pid - num_xblocks_13
+	        xnumel = 768
+	        rnumel = 1
+	        xoffset = pid_offset * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x14 = xindex
+	        tmp490 = tl.load(in_ptr70 + (x14), xmask)
+	        tmp491 = tl.load(in_ptr71 + (x14), xmask)
+	        tmp496 = tl.load(in_ptr72 + (x14), xmask)
+	        tmp503 = tl.load(in_ptr73 + (x14), xmask)
+	        tmp505 = tl.load(in_ptr74 + (0))
+	        tmp506 = tl.broadcast_to(tmp505, [XBLOCK])
+	        tmp492 = tmp491 - tmp490
+	        tmp493 = 0.09999999999999998
+	        tmp494 = tmp492 * tmp493
+	        tmp495 = tmp490 + tmp494
+	        tmp497 = 0.999
+	        tmp498 = tmp496 * tmp497
+	        tmp499 = tmp491 * tmp491
+	        tmp500 = 0.0010000000000000009
+	        tmp501 = tmp499 * tmp500
+	        tmp502 = tmp498 + tmp501
+	        tmp504 = libdevice.sqrt(tmp502)
+	        tmp507 = libdevice.pow(tmp497, tmp506)
+	        tmp508 = 1.0
+	        tmp509 = tmp507 - tmp508
+	        tmp510 = -tmp509
+	        tmp511 = libdevice.sqrt(tmp510)
+	        tmp512 = tmp504 / tmp511
+	        tmp513 = 1e-08
+	        tmp514 = tmp512 + tmp513
+	        tmp515 = 0.9
+	        tmp516 = libdevice.pow(tmp515, tmp506)
+	        tmp517 = tmp516 - tmp508
+	        tmp518 = 100.0
+	        tmp519 = tmp517 * tmp518
+	        tmp520 = tl.full([1], 1, tl.int32)
+	        tmp521 = tmp520 / tmp519
+	        tmp522 = tmp514 / tmp521
+	        tmp523 = tmp495 / tmp522
+	        tmp524 = tmp503 + tmp523
+	        tl.store(out_ptr56 + (x14), tmp495, xmask)
+	        tl.store(out_ptr58 + (x14), tmp524, xmask)
+	        tl.store(out_ptr59 + (x14), tmp502, xmask)
+	    else:
+	        pass
+	''', device_str='cuda')
+	
+	
+	async_compile.wait(globals())
+	del async_compile
+	
+	def call(args):
+	    arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1 = args
+	    args.clear()
+	    assert_size_stride(arg0_1, (50304, 768), (768, 1))
+	    assert_size_stride(arg1_1, (1024, 768), (768, 1))
+	    assert_size_stride(arg2_1, (768, ), (1, ))
+	    assert_size_stride(arg3_1, (768, ), (1, ))
+	    assert_size_stride(arg4_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg5_1, (2304, ), (1, ))
+	    assert_size_stride(arg6_1, (768, 768), (768, 1))
+	    assert_size_stride(arg7_1, (768, ), (1, ))
+	    assert_size_stride(arg8_1, (768, ), (1, ))
+	    assert_size_stride(arg9_1, (768, ), (1, ))
+	    assert_size_stride(arg10_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg11_1, (3072, ), (1, ))
+	    assert_size_stride(arg12_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg13_1, (768, ), (1, ))
+	    assert_size_stride(arg14_1, (768, ), (1, ))
+	    assert_size_stride(arg15_1, (768, ), (1, ))
+	    assert_size_stride(arg16_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg17_1, (2304, ), (1, ))
+	    assert_size_stride(arg18_1, (768, 768), (768, 1))
+	    assert_size_stride(arg19_1, (768, ), (1, ))
+	    assert_size_stride(arg20_1, (768, ), (1, ))
+	    assert_size_stride(arg21_1, (768, ), (1, ))
+	    assert_size_stride(arg22_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg23_1, (3072, ), (1, ))
+	    assert_size_stride(arg24_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg25_1, (768, ), (1, ))
+	    assert_size_stride(arg26_1, (768, ), (1, ))
+	    assert_size_stride(arg27_1, (768, ), (1, ))
+	    assert_size_stride(arg28_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg29_1, (2304, ), (1, ))
+	    assert_size_stride(arg30_1, (768, 768), (768, 1))
+	    assert_size_stride(arg31_1, (768, ), (1, ))
+	    assert_size_stride(arg32_1, (768, ), (1, ))
+	    assert_size_stride(arg33_1, (768, ), (1, ))
+	    assert_size_stride(arg34_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg35_1, (3072, ), (1, ))
+	    assert_size_stride(arg36_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg37_1, (768, ), (1, ))
+	    assert_size_stride(arg38_1, (768, ), (1, ))
+	    assert_size_stride(arg39_1, (768, ), (1, ))
+	    assert_size_stride(arg40_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg41_1, (2304, ), (1, ))
+	    assert_size_stride(arg42_1, (768, 768), (768, 1))
+	    assert_size_stride(arg43_1, (768, ), (1, ))
+	    assert_size_stride(arg44_1, (768, ), (1, ))
+	    assert_size_stride(arg45_1, (768, ), (1, ))
+	    assert_size_stride(arg46_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg47_1, (3072, ), (1, ))
+	    assert_size_stride(arg48_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg49_1, (768, ), (1, ))
+	    assert_size_stride(arg50_1, (768, ), (1, ))
+	    assert_size_stride(arg51_1, (768, ), (1, ))
+	    assert_size_stride(arg52_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg53_1, (2304, ), (1, ))
+	    assert_size_stride(arg54_1, (768, 768), (768, 1))
+	    assert_size_stride(arg55_1, (768, ), (1, ))
+	    assert_size_stride(arg56_1, (768, ), (1, ))
+	    assert_size_stride(arg57_1, (768, ), (1, ))
+	    assert_size_stride(arg58_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg59_1, (3072, ), (1, ))
+	    assert_size_stride(arg60_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg61_1, (768, ), (1, ))
+	    assert_size_stride(arg62_1, (768, ), (1, ))
+	    assert_size_stride(arg63_1, (768, ), (1, ))
+	    assert_size_stride(arg64_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg65_1, (2304, ), (1, ))
+	    assert_size_stride(arg66_1, (768, 768), (768, 1))
+	    assert_size_stride(arg67_1, (768, ), (1, ))
+	    assert_size_stride(arg68_1, (768, ), (1, ))
+	    assert_size_stride(arg69_1, (768, ), (1, ))
+	    assert_size_stride(arg70_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg71_1, (3072, ), (1, ))
+	    assert_size_stride(arg72_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg73_1, (768, ), (1, ))
+	    assert_size_stride(arg74_1, (768, ), (1, ))
+	    assert_size_stride(arg75_1, (768, ), (1, ))
+	    assert_size_stride(arg76_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg77_1, (2304, ), (1, ))
+	    assert_size_stride(arg78_1, (768, 768), (768, 1))
+	    assert_size_stride(arg79_1, (768, ), (1, ))
+	    assert_size_stride(arg80_1, (768, ), (1, ))
+	    assert_size_stride(arg81_1, (768, ), (1, ))
+	    assert_size_stride(arg82_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg83_1, (3072, ), (1, ))
+	    assert_size_stride(arg84_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg85_1, (768, ), (1, ))
+	    assert_size_stride(arg86_1, (768, ), (1, ))
+	    assert_size_stride(arg87_1, (768, ), (1, ))
+	    assert_size_stride(arg88_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg89_1, (2304, ), (1, ))
+	    assert_size_stride(arg90_1, (768, 768), (768, 1))
+	    assert_size_stride(arg91_1, (768, ), (1, ))
+	    assert_size_stride(arg92_1, (768, ), (1, ))
+	    assert_size_stride(arg93_1, (768, ), (1, ))
+	    assert_size_stride(arg94_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg95_1, (3072, ), (1, ))
+	    assert_size_stride(arg96_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg97_1, (768, ), (1, ))
+	    assert_size_stride(arg98_1, (768, ), (1, ))
+	    assert_size_stride(arg99_1, (768, ), (1, ))
+	    assert_size_stride(arg100_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg101_1, (2304, ), (1, ))
+	    assert_size_stride(arg102_1, (768, 768), (768, 1))
+	    assert_size_stride(arg103_1, (768, ), (1, ))
+	    assert_size_stride(arg104_1, (768, ), (1, ))
+	    assert_size_stride(arg105_1, (768, ), (1, ))
+	    assert_size_stride(arg106_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg107_1, (3072, ), (1, ))
+	    assert_size_stride(arg108_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg109_1, (768, ), (1, ))
+	    assert_size_stride(arg110_1, (768, ), (1, ))
+	    assert_size_stride(arg111_1, (768, ), (1, ))
+	    assert_size_stride(arg112_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg113_1, (2304, ), (1, ))
+	    assert_size_stride(arg114_1, (768, 768), (768, 1))
+	    assert_size_stride(arg115_1, (768, ), (1, ))
+	    assert_size_stride(arg116_1, (768, ), (1, ))
+	    assert_size_stride(arg117_1, (768, ), (1, ))
+	    assert_size_stride(arg118_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg119_1, (3072, ), (1, ))
+	    assert_size_stride(arg120_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg121_1, (768, ), (1, ))
+	    assert_size_stride(arg122_1, (768, ), (1, ))
+	    assert_size_stride(arg123_1, (768, ), (1, ))
+	    assert_size_stride(arg124_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg125_1, (2304, ), (1, ))
+	    assert_size_stride(arg126_1, (768, 768), (768, 1))
+	    assert_size_stride(arg127_1, (768, ), (1, ))
+	    assert_size_stride(arg128_1, (768, ), (1, ))
+	    assert_size_stride(arg129_1, (768, ), (1, ))
+	    assert_size_stride(arg130_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg131_1, (3072, ), (1, ))
+	    assert_size_stride(arg132_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg133_1, (768, ), (1, ))
+	    assert_size_stride(arg134_1, (768, ), (1, ))
+	    assert_size_stride(arg135_1, (768, ), (1, ))
+	    assert_size_stride(arg136_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg137_1, (2304, ), (1, ))
+	    assert_size_stride(arg138_1, (768, 768), (768, 1))
+	    assert_size_stride(arg139_1, (768, ), (1, ))
+	    assert_size_stride(arg140_1, (768, ), (1, ))
+	    assert_size_stride(arg141_1, (768, ), (1, ))
+	    assert_size_stride(arg142_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg143_1, (3072, ), (1, ))
+	    assert_size_stride(arg144_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg145_1, (768, ), (1, ))
+	    assert_size_stride(arg146_1, (768, ), (1, ))
+	    assert_size_stride(arg147_1, (768, ), (1, ))
+	    assert_size_stride(arg148_1, (), ())
+	    assert_size_stride(arg149_1, (1024, 768), (768, 1))
+	    assert_size_stride(arg150_1, (1024, 768), (768, 1))
+	    assert_size_stride(arg151_1, (50304, 768), (768, 1))
+	    assert_size_stride(arg152_1, (1024, 768), (768, 1))
+	    assert_size_stride(arg153_1, (768, ), (1, ))
+	    assert_size_stride(arg154_1, (768, ), (1, ))
+	    assert_size_stride(arg155_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg156_1, (2304, ), (1, ))
+	    assert_size_stride(arg157_1, (768, 768), (768, 1))
+	    assert_size_stride(arg158_1, (768, ), (1, ))
+	    assert_size_stride(arg159_1, (768, ), (1, ))
+	    assert_size_stride(arg160_1, (768, ), (1, ))
+	    assert_size_stride(arg161_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg162_1, (3072, ), (1, ))
+	    assert_size_stride(arg163_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg164_1, (768, ), (1, ))
+	    assert_size_stride(arg165_1, (768, ), (1, ))
+	    assert_size_stride(arg166_1, (768, ), (1, ))
+	    assert_size_stride(arg167_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg168_1, (2304, ), (1, ))
+	    assert_size_stride(arg169_1, (768, 768), (768, 1))
+	    assert_size_stride(arg170_1, (768, ), (1, ))
+	    assert_size_stride(arg171_1, (768, ), (1, ))
+	    assert_size_stride(arg172_1, (768, ), (1, ))
+	    assert_size_stride(arg173_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg174_1, (3072, ), (1, ))
+	    assert_size_stride(arg175_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg176_1, (768, ), (1, ))
+	    assert_size_stride(arg177_1, (768, ), (1, ))
+	    assert_size_stride(arg178_1, (768, ), (1, ))
+	    assert_size_stride(arg179_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg180_1, (2304, ), (1, ))
+	    assert_size_stride(arg181_1, (768, 768), (768, 1))
+	    assert_size_stride(arg182_1, (768, ), (1, ))
+	    assert_size_stride(arg183_1, (768, ), (1, ))
+	    assert_size_stride(arg184_1, (768, ), (1, ))
+	    assert_size_stride(arg185_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg186_1, (3072, ), (1, ))
+	    assert_size_stride(arg187_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg188_1, (768, ), (1, ))
+	    assert_size_stride(arg189_1, (768, ), (1, ))
+	    assert_size_stride(arg190_1, (768, ), (1, ))
+	    assert_size_stride(arg191_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg192_1, (2304, ), (1, ))
+	    assert_size_stride(arg193_1, (768, 768), (768, 1))
+	    assert_size_stride(arg194_1, (768, ), (1, ))
+	    assert_size_stride(arg195_1, (768, ), (1, ))
+	    assert_size_stride(arg196_1, (768, ), (1, ))
+	    assert_size_stride(arg197_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg198_1, (3072, ), (1, ))
+	    assert_size_stride(arg199_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg200_1, (768, ), (1, ))
+	    assert_size_stride(arg201_1, (768, ), (1, ))
+	    assert_size_stride(arg202_1, (768, ), (1, ))
+	    assert_size_stride(arg203_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg204_1, (2304, ), (1, ))
+	    assert_size_stride(arg205_1, (768, 768), (768, 1))
+	    assert_size_stride(arg206_1, (768, ), (1, ))
+	    assert_size_stride(arg207_1, (768, ), (1, ))
+	    assert_size_stride(arg208_1, (768, ), (1, ))
+	    assert_size_stride(arg209_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg210_1, (3072, ), (1, ))
+	    assert_size_stride(arg211_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg212_1, (768, ), (1, ))
+	    assert_size_stride(arg213_1, (768, ), (1, ))
+	    assert_size_stride(arg214_1, (768, ), (1, ))
+	    assert_size_stride(arg215_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg216_1, (2304, ), (1, ))
+	    assert_size_stride(arg217_1, (768, 768), (768, 1))
+	    assert_size_stride(arg218_1, (768, ), (1, ))
+	    assert_size_stride(arg219_1, (768, ), (1, ))
+	    assert_size_stride(arg220_1, (768, ), (1, ))
+	    assert_size_stride(arg221_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg222_1, (3072, ), (1, ))
+	    assert_size_stride(arg223_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg224_1, (768, ), (1, ))
+	    assert_size_stride(arg225_1, (768, ), (1, ))
+	    assert_size_stride(arg226_1, (768, ), (1, ))
+	    assert_size_stride(arg227_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg228_1, (2304, ), (1, ))
+	    assert_size_stride(arg229_1, (768, 768), (768, 1))
+	    assert_size_stride(arg230_1, (768, ), (1, ))
+	    assert_size_stride(arg231_1, (768, ), (1, ))
+	    assert_size_stride(arg232_1, (768, ), (1, ))
+	    assert_size_stride(arg233_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg234_1, (3072, ), (1, ))
+	    assert_size_stride(arg235_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg236_1, (768, ), (1, ))
+	    assert_size_stride(arg237_1, (768, ), (1, ))
+	    assert_size_stride(arg238_1, (768, ), (1, ))
+	    assert_size_stride(arg239_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg240_1, (2304, ), (1, ))
+	    assert_size_stride(arg241_1, (768, 768), (768, 1))
+	    assert_size_stride(arg242_1, (768, ), (1, ))
+	    assert_size_stride(arg243_1, (768, ), (1, ))
+	    assert_size_stride(arg244_1, (768, ), (1, ))
+	    assert_size_stride(arg245_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg246_1, (3072, ), (1, ))
+	    assert_size_stride(arg247_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg248_1, (768, ), (1, ))
+	    assert_size_stride(arg249_1, (768, ), (1, ))
+	    assert_size_stride(arg250_1, (768, ), (1, ))
+	    assert_size_stride(arg251_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg252_1, (2304, ), (1, ))
+	    assert_size_stride(arg253_1, (768, 768), (768, 1))
+	    assert_size_stride(arg254_1, (768, ), (1, ))
+	    assert_size_stride(arg255_1, (768, ), (1, ))
+	    assert_size_stride(arg256_1, (768, ), (1, ))
+	    assert_size_stride(arg257_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg258_1, (3072, ), (1, ))
+	    assert_size_stride(arg259_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg260_1, (768, ), (1, ))
+	    assert_size_stride(arg261_1, (768, ), (1, ))
+	    assert_size_stride(arg262_1, (768, ), (1, ))
+	    assert_size_stride(arg263_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg264_1, (2304, ), (1, ))
+	    assert_size_stride(arg265_1, (768, 768), (768, 1))
+	    assert_size_stride(arg266_1, (768, ), (1, ))
+	    assert_size_stride(arg267_1, (768, ), (1, ))
+	    assert_size_stride(arg268_1, (768, ), (1, ))
+	    assert_size_stride(arg269_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg270_1, (3072, ), (1, ))
+	    assert_size_stride(arg271_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg272_1, (768, ), (1, ))
+	    assert_size_stride(arg273_1, (768, ), (1, ))
+	    assert_size_stride(arg274_1, (768, ), (1, ))
+	    assert_size_stride(arg275_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg276_1, (2304, ), (1, ))
+	    assert_size_stride(arg277_1, (768, 768), (768, 1))
+	    assert_size_stride(arg278_1, (768, ), (1, ))
+	    assert_size_stride(arg279_1, (768, ), (1, ))
+	    assert_size_stride(arg280_1, (768, ), (1, ))
+	    assert_size_stride(arg281_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg282_1, (3072, ), (1, ))
+	    assert_size_stride(arg283_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg284_1, (768, ), (1, ))
+	    assert_size_stride(arg285_1, (768, ), (1, ))
+	    assert_size_stride(arg286_1, (768, ), (1, ))
+	    assert_size_stride(arg287_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg288_1, (2304, ), (1, ))
+	    assert_size_stride(arg289_1, (768, 768), (768, 1))
+	    assert_size_stride(arg290_1, (768, ), (1, ))
+	    assert_size_stride(arg291_1, (768, ), (1, ))
+	    assert_size_stride(arg292_1, (768, ), (1, ))
+	    assert_size_stride(arg293_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg294_1, (3072, ), (1, ))
+	    assert_size_stride(arg295_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg296_1, (768, ), (1, ))
+	    assert_size_stride(arg297_1, (768, ), (1, ))
+	    assert_size_stride(arg298_1, (768, ), (1, ))
+	    assert_size_stride(arg299_1, (50304, 768), (768, 1))
+	    assert_size_stride(arg300_1, (768, ), (1, ))
+	    assert_size_stride(arg301_1, (768, ), (1, ))
+	    assert_size_stride(arg302_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg303_1, (2304, ), (1, ))
+	    assert_size_stride(arg304_1, (768, 768), (768, 1))
+	    assert_size_stride(arg305_1, (768, ), (1, ))
+	    assert_size_stride(arg306_1, (768, ), (1, ))
+	    assert_size_stride(arg307_1, (768, ), (1, ))
+	    assert_size_stride(arg308_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg309_1, (3072, ), (1, ))
+	    assert_size_stride(arg310_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg311_1, (768, ), (1, ))
+	    assert_size_stride(arg312_1, (768, ), (1, ))
+	    assert_size_stride(arg313_1, (768, ), (1, ))
+	    assert_size_stride(arg314_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg315_1, (2304, ), (1, ))
+	    assert_size_stride(arg316_1, (768, 768), (768, 1))
+	    assert_size_stride(arg317_1, (768, ), (1, ))
+	    assert_size_stride(arg318_1, (768, ), (1, ))
+	    assert_size_stride(arg319_1, (768, ), (1, ))
+	    assert_size_stride(arg320_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg321_1, (3072, ), (1, ))
+	    assert_size_stride(arg322_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg323_1, (768, ), (1, ))
+	    assert_size_stride(arg324_1, (768, ), (1, ))
+	    assert_size_stride(arg325_1, (768, ), (1, ))
+	    assert_size_stride(arg326_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg327_1, (2304, ), (1, ))
+	    assert_size_stride(arg328_1, (768, 768), (768, 1))
+	    assert_size_stride(arg329_1, (768, ), (1, ))
+	    assert_size_stride(arg330_1, (768, ), (1, ))
+	    assert_size_stride(arg331_1, (768, ), (1, ))
+	    assert_size_stride(arg332_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg333_1, (3072, ), (1, ))
+	    assert_size_stride(arg334_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg335_1, (768, ), (1, ))
+	    assert_size_stride(arg336_1, (768, ), (1, ))
+	    assert_size_stride(arg337_1, (768, ), (1, ))
+	    assert_size_stride(arg338_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg339_1, (2304, ), (1, ))
+	    assert_size_stride(arg340_1, (768, 768), (768, 1))
+	    assert_size_stride(arg341_1, (768, ), (1, ))
+	    assert_size_stride(arg342_1, (768, ), (1, ))
+	    assert_size_stride(arg343_1, (768, ), (1, ))
+	    assert_size_stride(arg344_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg345_1, (3072, ), (1, ))
+	    assert_size_stride(arg346_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg347_1, (768, ), (1, ))
+	    assert_size_stride(arg348_1, (768, ), (1, ))
+	    assert_size_stride(arg349_1, (768, ), (1, ))
+	    assert_size_stride(arg350_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg351_1, (2304, ), (1, ))
+	    assert_size_stride(arg352_1, (768, 768), (768, 1))
+	    assert_size_stride(arg353_1, (768, ), (1, ))
+	    assert_size_stride(arg354_1, (768, ), (1, ))
+	    assert_size_stride(arg355_1, (768, ), (1, ))
+	    assert_size_stride(arg356_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg357_1, (3072, ), (1, ))
+	    assert_size_stride(arg358_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg359_1, (768, ), (1, ))
+	    assert_size_stride(arg360_1, (768, ), (1, ))
+	    assert_size_stride(arg361_1, (768, ), (1, ))
+	    assert_size_stride(arg362_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg363_1, (2304, ), (1, ))
+	    assert_size_stride(arg364_1, (768, 768), (768, 1))
+	    assert_size_stride(arg365_1, (768, ), (1, ))
+	    assert_size_stride(arg366_1, (768, ), (1, ))
+	    assert_size_stride(arg367_1, (768, ), (1, ))
+	    assert_size_stride(arg368_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg369_1, (3072, ), (1, ))
+	    assert_size_stride(arg370_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg371_1, (768, ), (1, ))
+	    assert_size_stride(arg372_1, (768, ), (1, ))
+	    assert_size_stride(arg373_1, (768, ), (1, ))
+	    assert_size_stride(arg374_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg375_1, (2304, ), (1, ))
+	    assert_size_stride(arg376_1, (768, 768), (768, 1))
+	    assert_size_stride(arg377_1, (768, ), (1, ))
+	    assert_size_stride(arg378_1, (768, ), (1, ))
+	    assert_size_stride(arg379_1, (768, ), (1, ))
+	    assert_size_stride(arg380_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg381_1, (3072, ), (1, ))
+	    assert_size_stride(arg382_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg383_1, (768, ), (1, ))
+	    assert_size_stride(arg384_1, (768, ), (1, ))
+	    assert_size_stride(arg385_1, (768, ), (1, ))
+	    assert_size_stride(arg386_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg387_1, (2304, ), (1, ))
+	    assert_size_stride(arg388_1, (768, 768), (768, 1))
+	    assert_size_stride(arg389_1, (768, ), (1, ))
+	    assert_size_stride(arg390_1, (768, ), (1, ))
+	    assert_size_stride(arg391_1, (768, ), (1, ))
+	    assert_size_stride(arg392_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg393_1, (3072, ), (1, ))
+	    assert_size_stride(arg394_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg395_1, (768, ), (1, ))
+	    assert_size_stride(arg396_1, (768, ), (1, ))
+	    assert_size_stride(arg397_1, (768, ), (1, ))
+	    assert_size_stride(arg398_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg399_1, (2304, ), (1, ))
+	    assert_size_stride(arg400_1, (768, 768), (768, 1))
+	    assert_size_stride(arg401_1, (768, ), (1, ))
+	    assert_size_stride(arg402_1, (768, ), (1, ))
+	    assert_size_stride(arg403_1, (768, ), (1, ))
+	    assert_size_stride(arg404_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg405_1, (3072, ), (1, ))
+	    assert_size_stride(arg406_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg407_1, (768, ), (1, ))
+	    assert_size_stride(arg408_1, (768, ), (1, ))
+	    assert_size_stride(arg409_1, (768, ), (1, ))
+	    assert_size_stride(arg410_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg411_1, (2304, ), (1, ))
+	    assert_size_stride(arg412_1, (768, 768), (768, 1))
+	    assert_size_stride(arg413_1, (768, ), (1, ))
+	    assert_size_stride(arg414_1, (768, ), (1, ))
+	    assert_size_stride(arg415_1, (768, ), (1, ))
+	    assert_size_stride(arg416_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg417_1, (3072, ), (1, ))
+	    assert_size_stride(arg418_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg419_1, (768, ), (1, ))
+	    assert_size_stride(arg420_1, (768, ), (1, ))
+	    assert_size_stride(arg421_1, (768, ), (1, ))
+	    assert_size_stride(arg422_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg423_1, (2304, ), (1, ))
+	    assert_size_stride(arg424_1, (768, 768), (768, 1))
+	    assert_size_stride(arg425_1, (768, ), (1, ))
+	    assert_size_stride(arg426_1, (768, ), (1, ))
+	    assert_size_stride(arg427_1, (768, ), (1, ))
+	    assert_size_stride(arg428_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg429_1, (3072, ), (1, ))
+	    assert_size_stride(arg430_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg431_1, (768, ), (1, ))
+	    assert_size_stride(arg432_1, (768, ), (1, ))
+	    assert_size_stride(arg433_1, (768, ), (1, ))
+	    assert_size_stride(arg434_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg435_1, (2304, ), (1, ))
+	    assert_size_stride(arg436_1, (768, 768), (768, 1))
+	    assert_size_stride(arg437_1, (768, ), (1, ))
+	    assert_size_stride(arg438_1, (768, ), (1, ))
+	    assert_size_stride(arg439_1, (768, ), (1, ))
+	    assert_size_stride(arg440_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg441_1, (3072, ), (1, ))
+	    assert_size_stride(arg442_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg443_1, (768, ), (1, ))
+	    assert_size_stride(arg444_1, (768, ), (1, ))
+	    assert_size_stride(arg445_1, (768, ), (1, ))
+	    assert_size_stride(arg446_1, (50304, 768), (768, 1))
+	    assert_size_stride(arg447_1, (768, ), (1, ))
+	    assert_size_stride(arg448_1, (768, ), (1, ))
+	    assert_size_stride(arg449_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg450_1, (2304, ), (1, ))
+	    assert_size_stride(arg451_1, (768, 768), (768, 1))
+	    assert_size_stride(arg452_1, (768, ), (1, ))
+	    assert_size_stride(arg453_1, (768, ), (1, ))
+	    assert_size_stride(arg454_1, (768, ), (1, ))
+	    assert_size_stride(arg455_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg456_1, (3072, ), (1, ))
+	    assert_size_stride(arg457_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg458_1, (768, ), (1, ))
+	    assert_size_stride(arg459_1, (768, ), (1, ))
+	    assert_size_stride(arg460_1, (768, ), (1, ))
+	    assert_size_stride(arg461_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg462_1, (2304, ), (1, ))
+	    assert_size_stride(arg463_1, (768, 768), (768, 1))
+	    assert_size_stride(arg464_1, (768, ), (1, ))
+	    assert_size_stride(arg465_1, (768, ), (1, ))
+	    assert_size_stride(arg466_1, (768, ), (1, ))
+	    assert_size_stride(arg467_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg468_1, (3072, ), (1, ))
+	    assert_size_stride(arg469_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg470_1, (768, ), (1, ))
+	    assert_size_stride(arg471_1, (768, ), (1, ))
+	    assert_size_stride(arg472_1, (768, ), (1, ))
+	    assert_size_stride(arg473_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg474_1, (2304, ), (1, ))
+	    assert_size_stride(arg475_1, (768, 768), (768, 1))
+	    assert_size_stride(arg476_1, (768, ), (1, ))
+	    assert_size_stride(arg477_1, (768, ), (1, ))
+	    assert_size_stride(arg478_1, (768, ), (1, ))
+	    assert_size_stride(arg479_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg480_1, (3072, ), (1, ))
+	    assert_size_stride(arg481_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg482_1, (768, ), (1, ))
+	    assert_size_stride(arg483_1, (768, ), (1, ))
+	    assert_size_stride(arg484_1, (768, ), (1, ))
+	    assert_size_stride(arg485_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg486_1, (2304, ), (1, ))
+	    assert_size_stride(arg487_1, (768, 768), (768, 1))
+	    assert_size_stride(arg488_1, (768, ), (1, ))
+	    assert_size_stride(arg489_1, (768, ), (1, ))
+	    assert_size_stride(arg490_1, (768, ), (1, ))
+	    assert_size_stride(arg491_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg492_1, (3072, ), (1, ))
+	    assert_size_stride(arg493_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg494_1, (768, ), (1, ))
+	    assert_size_stride(arg495_1, (768, ), (1, ))
+	    assert_size_stride(arg496_1, (768, ), (1, ))
+	    assert_size_stride(arg497_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg498_1, (2304, ), (1, ))
+	    assert_size_stride(arg499_1, (768, 768), (768, 1))
+	    assert_size_stride(arg500_1, (768, ), (1, ))
+	    assert_size_stride(arg501_1, (768, ), (1, ))
+	    assert_size_stride(arg502_1, (768, ), (1, ))
+	    assert_size_stride(arg503_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg504_1, (3072, ), (1, ))
+	    assert_size_stride(arg505_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg506_1, (768, ), (1, ))
+	    assert_size_stride(arg507_1, (768, ), (1, ))
+	    assert_size_stride(arg508_1, (768, ), (1, ))
+	    assert_size_stride(arg509_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg510_1, (2304, ), (1, ))
+	    assert_size_stride(arg511_1, (768, 768), (768, 1))
+	    assert_size_stride(arg512_1, (768, ), (1, ))
+	    assert_size_stride(arg513_1, (768, ), (1, ))
+	    assert_size_stride(arg514_1, (768, ), (1, ))
+	    assert_size_stride(arg515_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg516_1, (3072, ), (1, ))
+	    assert_size_stride(arg517_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg518_1, (768, ), (1, ))
+	    assert_size_stride(arg519_1, (768, ), (1, ))
+	    assert_size_stride(arg520_1, (768, ), (1, ))
+	    assert_size_stride(arg521_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg522_1, (2304, ), (1, ))
+	    assert_size_stride(arg523_1, (768, 768), (768, 1))
+	    assert_size_stride(arg524_1, (768, ), (1, ))
+	    assert_size_stride(arg525_1, (768, ), (1, ))
+	    assert_size_stride(arg526_1, (768, ), (1, ))
+	    assert_size_stride(arg527_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg528_1, (3072, ), (1, ))
+	    assert_size_stride(arg529_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg530_1, (768, ), (1, ))
+	    assert_size_stride(arg531_1, (768, ), (1, ))
+	    assert_size_stride(arg532_1, (768, ), (1, ))
+	    assert_size_stride(arg533_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg534_1, (2304, ), (1, ))
+	    assert_size_stride(arg535_1, (768, 768), (768, 1))
+	    assert_size_stride(arg536_1, (768, ), (1, ))
+	    assert_size_stride(arg537_1, (768, ), (1, ))
+	    assert_size_stride(arg538_1, (768, ), (1, ))
+	    assert_size_stride(arg539_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg540_1, (3072, ), (1, ))
+	    assert_size_stride(arg541_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg542_1, (768, ), (1, ))
+	    assert_size_stride(arg543_1, (768, ), (1, ))
+	    assert_size_stride(arg544_1, (768, ), (1, ))
+	    assert_size_stride(arg545_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg546_1, (2304, ), (1, ))
+	    assert_size_stride(arg547_1, (768, 768), (768, 1))
+	    assert_size_stride(arg548_1, (768, ), (1, ))
+	    assert_size_stride(arg549_1, (768, ), (1, ))
+	    assert_size_stride(arg550_1, (768, ), (1, ))
+	    assert_size_stride(arg551_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg552_1, (3072, ), (1, ))
+	    assert_size_stride(arg553_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg554_1, (768, ), (1, ))
+	    assert_size_stride(arg555_1, (768, ), (1, ))
+	    assert_size_stride(arg556_1, (768, ), (1, ))
+	    assert_size_stride(arg557_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg558_1, (2304, ), (1, ))
+	    assert_size_stride(arg559_1, (768, 768), (768, 1))
+	    assert_size_stride(arg560_1, (768, ), (1, ))
+	    assert_size_stride(arg561_1, (768, ), (1, ))
+	    assert_size_stride(arg562_1, (768, ), (1, ))
+	    assert_size_stride(arg563_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg564_1, (3072, ), (1, ))
+	    assert_size_stride(arg565_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg566_1, (768, ), (1, ))
+	    assert_size_stride(arg567_1, (768, ), (1, ))
+	    assert_size_stride(arg568_1, (768, ), (1, ))
+	    assert_size_stride(arg569_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg570_1, (2304, ), (1, ))
+	    assert_size_stride(arg571_1, (768, 768), (768, 1))
+	    assert_size_stride(arg572_1, (768, ), (1, ))
+	    assert_size_stride(arg573_1, (768, ), (1, ))
+	    assert_size_stride(arg574_1, (768, ), (1, ))
+	    assert_size_stride(arg575_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg576_1, (3072, ), (1, ))
+	    assert_size_stride(arg577_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg578_1, (768, ), (1, ))
+	    assert_size_stride(arg579_1, (768, ), (1, ))
+	    assert_size_stride(arg580_1, (768, ), (1, ))
+	    assert_size_stride(arg581_1, (2304, 768), (768, 1))
+	    assert_size_stride(arg582_1, (2304, ), (1, ))
+	    assert_size_stride(arg583_1, (768, 768), (768, 1))
+	    assert_size_stride(arg584_1, (768, ), (1, ))
+	    assert_size_stride(arg585_1, (768, ), (1, ))
+	    assert_size_stride(arg586_1, (768, ), (1, ))
+	    assert_size_stride(arg587_1, (3072, 768), (768, 1))
+	    assert_size_stride(arg588_1, (3072, ), (1, ))
+	    assert_size_stride(arg589_1, (768, 3072), (3072, 1))
+	    assert_size_stride(arg590_1, (768, ), (1, ))
+	    assert_size_stride(arg591_1, (768, ), (1, ))
+	    assert_size_stride(arg592_1, (768, ), (1, ))
+	    assert_size_stride(arg593_1, (), ())
+	    assert_size_stride(arg594_1, (), ())
+	    assert_size_stride(arg595_1, (), ())
+	    assert_size_stride(arg596_1, (), ())
+	    assert_size_stride(arg597_1, (), ())
+	    assert_size_stride(arg598_1, (), ())
+	    assert_size_stride(arg599_1, (), ())
+	    assert_size_stride(arg600_1, (), ())
+	    assert_size_stride(arg601_1, (), ())
+	    assert_size_stride(arg602_1, (), ())
+	    assert_size_stride(arg603_1, (), ())
+	    assert_size_stride(arg604_1, (), ())
+	    assert_size_stride(arg605_1, (), ())
+	    assert_size_stride(arg606_1, (), ())
+	    assert_size_stride(arg607_1, (), ())
+	    assert_size_stride(arg608_1, (), ())
+	    assert_size_stride(arg609_1, (), ())
+	    assert_size_stride(arg610_1, (), ())
+	    assert_size_stride(arg611_1, (), ())
+	    assert_size_stride(arg612_1, (), ())
+	    assert_size_stride(arg613_1, (), ())
+	    assert_size_stride(arg614_1, (), ())
+	    assert_size_stride(arg615_1, (), ())
+	    assert_size_stride(arg616_1, (), ())
+	    assert_size_stride(arg617_1, (), ())
+	    assert_size_stride(arg618_1, (), ())
+	    assert_size_stride(arg619_1, (), ())
+	    assert_size_stride(arg620_1, (), ())
+	    assert_size_stride(arg621_1, (), ())
+	    assert_size_stride(arg622_1, (), ())
+	    assert_size_stride(arg623_1, (), ())
+	    assert_size_stride(arg624_1, (), ())
+	    assert_size_stride(arg625_1, (), ())
+	    assert_size_stride(arg626_1, (), ())
+	    assert_size_stride(arg627_1, (), ())
+	    assert_size_stride(arg628_1, (), ())
+	    assert_size_stride(arg629_1, (), ())
+	    assert_size_stride(arg630_1, (), ())
+	    assert_size_stride(arg631_1, (), ())
+	    assert_size_stride(arg632_1, (), ())
+	    assert_size_stride(arg633_1, (), ())
+	    assert_size_stride(arg634_1, (), ())
+	    assert_size_stride(arg635_1, (), ())
+	    assert_size_stride(arg636_1, (), ())
+	    assert_size_stride(arg637_1, (), ())
+	    assert_size_stride(arg638_1, (), ())
+	    assert_size_stride(arg639_1, (), ())
+	    assert_size_stride(arg640_1, (), ())
+	    assert_size_stride(arg641_1, (), ())
+	    assert_size_stride(arg642_1, (), ())
+	    assert_size_stride(arg643_1, (), ())
+	    assert_size_stride(arg644_1, (), ())
+	    assert_size_stride(arg645_1, (), ())
+	    assert_size_stride(arg646_1, (), ())
+	    assert_size_stride(arg647_1, (), ())
+	    assert_size_stride(arg648_1, (), ())
+	    assert_size_stride(arg649_1, (), ())
+	    assert_size_stride(arg650_1, (), ())
+	    assert_size_stride(arg651_1, (), ())
+	    assert_size_stride(arg652_1, (), ())
+	    assert_size_stride(arg653_1, (), ())
+	    assert_size_stride(arg654_1, (), ())
+	    assert_size_stride(arg655_1, (), ())
+	    assert_size_stride(arg656_1, (), ())
+	    assert_size_stride(arg657_1, (), ())
+	    assert_size_stride(arg658_1, (), ())
+	    assert_size_stride(arg659_1, (), ())
+	    assert_size_stride(arg660_1, (), ())
+	    assert_size_stride(arg661_1, (), ())
+	    assert_size_stride(arg662_1, (), ())
+	    assert_size_stride(arg663_1, (), ())
+	    assert_size_stride(arg664_1, (), ())
+	    assert_size_stride(arg665_1, (), ())
+	    assert_size_stride(arg666_1, (), ())
+	    assert_size_stride(arg667_1, (), ())
+	    assert_size_stride(arg668_1, (), ())
+	    assert_size_stride(arg669_1, (), ())
+	    assert_size_stride(arg670_1, (), ())
+	    assert_size_stride(arg671_1, (), ())
+	    assert_size_stride(arg672_1, (), ())
+	    assert_size_stride(arg673_1, (), ())
+	    assert_size_stride(arg674_1, (), ())
+	    assert_size_stride(arg675_1, (), ())
+	    assert_size_stride(arg676_1, (), ())
+	    assert_size_stride(arg677_1, (), ())
+	    assert_size_stride(arg678_1, (), ())
+	    assert_size_stride(arg679_1, (), ())
+	    assert_size_stride(arg680_1, (), ())
+	    assert_size_stride(arg681_1, (), ())
+	    assert_size_stride(arg682_1, (), ())
+	    assert_size_stride(arg683_1, (), ())
+	    assert_size_stride(arg684_1, (), ())
+	    assert_size_stride(arg685_1, (), ())
+	    assert_size_stride(arg686_1, (), ())
+	    assert_size_stride(arg687_1, (), ())
+	    assert_size_stride(arg688_1, (), ())
+	    assert_size_stride(arg689_1, (), ())
+	    assert_size_stride(arg690_1, (), ())
+	    assert_size_stride(arg691_1, (), ())
+	    assert_size_stride(arg692_1, (), ())
+	    assert_size_stride(arg693_1, (), ())
+	    assert_size_stride(arg694_1, (), ())
+	    assert_size_stride(arg695_1, (), ())
+	    assert_size_stride(arg696_1, (), ())
+	    assert_size_stride(arg697_1, (), ())
+	    assert_size_stride(arg698_1, (), ())
+	    assert_size_stride(arg699_1, (), ())
+	    assert_size_stride(arg700_1, (), ())
+	    assert_size_stride(arg701_1, (), ())
+	    assert_size_stride(arg702_1, (), ())
+	    assert_size_stride(arg703_1, (), ())
+	    assert_size_stride(arg704_1, (), ())
+	    assert_size_stride(arg705_1, (), ())
+	    assert_size_stride(arg706_1, (), ())
+	    assert_size_stride(arg707_1, (), ())
+	    assert_size_stride(arg708_1, (), ())
+	    assert_size_stride(arg709_1, (), ())
+	    assert_size_stride(arg710_1, (), ())
+	    assert_size_stride(arg711_1, (), ())
+	    assert_size_stride(arg712_1, (), ())
+	    assert_size_stride(arg713_1, (), ())
+	    assert_size_stride(arg714_1, (), ())
+	    assert_size_stride(arg715_1, (), ())
+	    assert_size_stride(arg716_1, (), ())
+	    assert_size_stride(arg717_1, (), ())
+	    assert_size_stride(arg718_1, (), ())
+	    assert_size_stride(arg719_1, (), ())
+	    assert_size_stride(arg720_1, (), ())
+	    assert_size_stride(arg721_1, (), ())
+	    assert_size_stride(arg722_1, (), ())
+	    assert_size_stride(arg723_1, (), ())
+	    assert_size_stride(arg724_1, (), ())
+	    assert_size_stride(arg725_1, (), ())
+	    assert_size_stride(arg726_1, (), ())
+	    assert_size_stride(arg727_1, (), ())
+	    assert_size_stride(arg728_1, (), ())
+	    assert_size_stride(arg729_1, (), ())
+	    assert_size_stride(arg730_1, (), ())
+	    assert_size_stride(arg731_1, (), ())
+	    assert_size_stride(arg732_1, (), ())
+	    assert_size_stride(arg733_1, (), ())
+	    assert_size_stride(arg734_1, (), ())
+	    assert_size_stride(arg735_1, (), ())
+	    assert_size_stride(arg736_1, (), ())
+	    assert_size_stride(arg737_1, (), ())
+	    assert_size_stride(arg738_1, (), ())
+	    assert_size_stride(arg739_1, (), ())
+	    with torch.cuda._DeviceGuard(0):
+	        torch.cuda.set_device(0)
+	        # Source Nodes: [], Original ATen: []
+	        stream0 = get_raw_stream(0)
+	        triton_for_fused_0.run(arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, grid=(83, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_1.run(arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1, grid=(65, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_2.run(arg299_1, arg151_1, arg446_1, arg0_1, arg593_1, arg149_1, arg152_1, arg150_1, arg1_1, arg148_1, arg300_1, arg153_1, arg447_1, arg2_1, arg594_1, arg301_1, arg154_1, arg448_1, arg3_1, arg595_1, arg302_1, arg155_1, arg449_1, arg4_1, arg596_1, arg303_1, arg156_1, arg450_1, arg5_1, arg597_1, arg304_1, arg157_1, arg451_1, arg6_1, arg598_1, arg305_1, arg158_1, arg452_1, arg7_1, arg599_1, arg306_1, arg159_1, arg453_1, arg8_1, arg600_1, arg307_1, arg160_1, arg454_1, arg9_1, arg601_1, arg308_1, arg161_1, arg455_1, arg10_1, arg602_1, arg309_1, arg162_1, arg456_1, arg11_1, arg603_1, arg310_1, arg163_1, arg457_1, arg12_1, arg604_1, arg311_1, arg164_1, arg458_1, arg13_1, arg605_1, arg312_1, arg165_1, arg459_1, arg14_1, arg606_1, arg313_1, arg166_1, arg460_1, arg15_1, arg607_1, arg314_1, arg167_1, arg461_1, arg16_1, arg608_1, arg315_1, arg168_1, arg462_1, arg17_1, arg609_1, arg316_1, arg169_1, arg463_1, arg18_1, arg610_1, arg299_1, arg0_1, arg446_1, arg149_1, arg1_1, arg150_1, arg300_1, arg2_1, arg447_1, arg301_1, arg3_1, arg448_1, arg302_1, arg4_1, arg449_1, arg303_1, arg5_1, arg450_1, arg304_1, arg6_1, arg451_1, arg305_1, arg7_1, arg452_1, arg306_1, arg8_1, arg453_1, arg307_1, arg9_1, arg454_1, arg308_1, arg10_1, arg455_1, arg309_1, arg11_1, arg456_1, arg310_1, arg12_1, arg457_1, arg311_1, arg13_1, arg458_1, arg312_1, arg14_1, arg459_1, arg313_1, arg15_1, arg460_1, arg314_1, arg16_1, arg461_1, arg315_1, arg17_1, arg462_1, arg316_1, arg18_1, arg463_1, grid=(47729, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_3.run(arg317_1, arg170_1, arg464_1, arg19_1, arg611_1, arg318_1, arg171_1, arg465_1, arg20_1, arg612_1, arg319_1, arg172_1, arg466_1, arg21_1, arg613_1, arg320_1, arg173_1, arg467_1, arg22_1, arg614_1, arg321_1, arg174_1, arg468_1, arg23_1, arg615_1, arg322_1, arg175_1, arg469_1, arg24_1, arg616_1, arg323_1, arg176_1, arg470_1, arg25_1, arg617_1, arg324_1, arg177_1, arg471_1, arg26_1, arg618_1, arg325_1, arg178_1, arg472_1, arg27_1, arg619_1, arg326_1, arg179_1, arg473_1, arg28_1, arg620_1, arg327_1, arg180_1, arg474_1, arg29_1, arg621_1, arg328_1, arg181_1, arg475_1, arg30_1, arg622_1, arg329_1, arg182_1, arg476_1, arg31_1, arg623_1, arg330_1, arg183_1, arg477_1, arg32_1, arg624_1, arg331_1, arg184_1, arg478_1, arg33_1, arg625_1, arg332_1, arg185_1, arg479_1, arg34_1, arg626_1, arg333_1, arg186_1, arg480_1, arg35_1, arg627_1, arg334_1, arg187_1, arg481_1, arg36_1, arg628_1, arg335_1, arg188_1, arg482_1, arg37_1, arg629_1, arg317_1, arg19_1, arg464_1, arg318_1, arg20_1, arg465_1, arg319_1, arg21_1, arg466_1, arg320_1, arg22_1, arg467_1, arg321_1, arg23_1, arg468_1, arg322_1, arg24_1, arg469_1, arg323_1, arg25_1, arg470_1, arg324_1, arg26_1, arg471_1, arg325_1, arg27_1, arg472_1, arg326_1, arg28_1, arg473_1, arg327_1, arg29_1, arg474_1, arg328_1, arg30_1, arg475_1, arg329_1, arg31_1, arg476_1, arg330_1, arg32_1, arg477_1, arg331_1, arg33_1, arg478_1, arg332_1, arg34_1, arg479_1, arg333_1, arg35_1, arg480_1, arg334_1, arg36_1, arg481_1, arg335_1, arg37_1, arg482_1, grid=(11539, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_4.run(arg336_1, arg189_1, arg483_1, arg38_1, arg630_1, arg337_1, arg190_1, arg484_1, arg39_1, arg631_1, arg338_1, arg191_1, arg485_1, arg40_1, arg632_1, arg339_1, arg192_1, arg486_1, arg41_1, arg633_1, arg340_1, arg193_1, arg487_1, arg42_1, arg634_1, arg341_1, arg194_1, arg488_1, arg43_1, arg635_1, arg342_1, arg195_1, arg489_1, arg44_1, arg636_1, arg343_1, arg196_1, arg490_1, arg45_1, arg637_1, arg344_1, arg197_1, arg491_1, arg46_1, arg638_1, arg345_1, arg198_1, arg492_1, arg47_1, arg639_1, arg346_1, arg199_1, arg493_1, arg48_1, arg640_1, arg347_1, arg200_1, arg494_1, arg49_1, arg641_1, arg348_1, arg201_1, arg495_1, arg50_1, arg642_1, arg349_1, arg202_1, arg496_1, arg51_1, arg643_1, arg350_1, arg203_1, arg497_1, arg52_1, arg644_1, arg351_1, arg204_1, arg498_1, arg53_1, arg645_1, arg352_1, arg205_1, arg499_1, arg54_1, arg646_1, arg353_1, arg206_1, arg500_1, arg55_1, arg647_1, arg354_1, arg207_1, arg501_1, arg56_1, arg648_1, arg336_1, arg38_1, arg483_1, arg337_1, arg39_1, arg484_1, arg338_1, arg40_1, arg485_1, arg339_1, arg41_1, arg486_1, arg340_1, arg42_1, arg487_1, arg341_1, arg43_1, arg488_1, arg342_1, arg44_1, arg489_1, arg343_1, arg45_1, arg490_1, arg344_1, arg46_1, arg491_1, arg345_1, arg47_1, arg492_1, arg346_1, arg48_1, arg493_1, arg347_1, arg49_1, arg494_1, arg348_1, arg50_1, arg495_1, arg349_1, arg51_1, arg496_1, arg350_1, arg52_1, arg497_1, arg351_1, arg53_1, arg498_1, arg352_1, arg54_1, arg499_1, arg353_1, arg55_1, arg500_1, arg354_1, arg56_1, arg501_1, grid=(9235, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_5.run(arg355_1, arg208_1, arg502_1, arg57_1, arg649_1, arg356_1, arg209_1, arg503_1, arg58_1, arg650_1, arg357_1, arg210_1, arg504_1, arg59_1, arg651_1, arg358_1, arg211_1, arg505_1, arg60_1, arg652_1, arg359_1, arg212_1, arg506_1, arg61_1, arg653_1, arg360_1, arg213_1, arg507_1, arg62_1, arg654_1, arg361_1, arg214_1, arg508_1, arg63_1, arg655_1, arg362_1, arg215_1, arg509_1, arg64_1, arg656_1, arg363_1, arg216_1, arg510_1, arg65_1, arg657_1, arg364_1, arg217_1, arg511_1, arg66_1, arg658_1, arg365_1, arg218_1, arg512_1, arg67_1, arg659_1, arg366_1, arg219_1, arg513_1, arg68_1, arg660_1, arg367_1, arg220_1, arg514_1, arg69_1, arg661_1, arg368_1, arg221_1, arg515_1, arg70_1, arg662_1, arg369_1, arg222_1, arg516_1, arg71_1, arg663_1, arg370_1, arg223_1, arg517_1, arg72_1, arg664_1, arg371_1, arg224_1, arg518_1, arg73_1, arg665_1, arg372_1, arg225_1, arg519_1, arg74_1, arg666_1, arg373_1, arg226_1, arg520_1, arg75_1, arg667_1, arg355_1, arg57_1, arg502_1, arg356_1, arg58_1, arg503_1, arg357_1, arg59_1, arg504_1, arg358_1, arg60_1, arg505_1, arg359_1, arg61_1, arg506_1, arg360_1, arg62_1, arg507_1, arg361_1, arg63_1, arg508_1, arg362_1, arg64_1, arg509_1, arg363_1, arg65_1, arg510_1, arg364_1, arg66_1, arg511_1, arg365_1, arg67_1, arg512_1, arg366_1, arg68_1, arg513_1, arg367_1, arg69_1, arg514_1, arg368_1, arg70_1, arg515_1, arg369_1, arg71_1, arg516_1, arg370_1, arg72_1, arg517_1, arg371_1, arg73_1, arg518_1, arg372_1, arg74_1, arg519_1, arg373_1, arg75_1, arg520_1, grid=(11539, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_6.run(arg374_1, arg227_1, arg521_1, arg76_1, arg668_1, arg375_1, arg228_1, arg522_1, arg77_1, arg669_1, arg376_1, arg229_1, arg523_1, arg78_1, arg670_1, arg377_1, arg230_1, arg524_1, arg79_1, arg671_1, arg378_1, arg231_1, arg525_1, arg80_1, arg672_1, arg379_1, arg232_1, arg526_1, arg81_1, arg673_1, arg380_1, arg233_1, arg527_1, arg82_1, arg674_1, arg381_1, arg234_1, arg528_1, arg83_1, arg675_1, arg382_1, arg235_1, arg529_1, arg84_1, arg676_1, arg383_1, arg236_1, arg530_1, arg85_1, arg677_1, arg384_1, arg237_1, arg531_1, arg86_1, arg678_1, arg385_1, arg238_1, arg532_1, arg87_1, arg679_1, arg386_1, arg239_1, arg533_1, arg88_1, arg680_1, arg387_1, arg240_1, arg534_1, arg89_1, arg681_1, arg388_1, arg241_1, arg535_1, arg90_1, arg682_1, arg389_1, arg242_1, arg536_1, arg91_1, arg683_1, arg390_1, arg243_1, arg537_1, arg92_1, arg684_1, arg391_1, arg244_1, arg538_1, arg93_1, arg685_1, arg392_1, arg245_1, arg539_1, arg94_1, arg686_1, arg374_1, arg76_1, arg521_1, arg375_1, arg77_1, arg522_1, arg376_1, arg78_1, arg523_1, arg377_1, arg79_1, arg524_1, arg378_1, arg80_1, arg525_1, arg379_1, arg81_1, arg526_1, arg380_1, arg82_1, arg527_1, arg381_1, arg83_1, arg528_1, arg382_1, arg84_1, arg529_1, arg383_1, arg85_1, arg530_1, arg384_1, arg86_1, arg531_1, arg385_1, arg87_1, arg532_1, arg386_1, arg88_1, arg533_1, arg387_1, arg89_1, arg534_1, arg388_1, arg90_1, arg535_1, arg389_1, arg91_1, arg536_1, arg390_1, arg92_1, arg537_1, arg391_1, arg93_1, arg538_1, arg392_1, arg94_1, arg539_1, grid=(11538, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_7.run(arg393_1, arg246_1, arg540_1, arg95_1, arg687_1, arg394_1, arg247_1, arg541_1, arg96_1, arg688_1, arg395_1, arg248_1, arg542_1, arg97_1, arg689_1, arg396_1, arg249_1, arg543_1, arg98_1, arg690_1, arg397_1, arg250_1, arg544_1, arg99_1, arg691_1, arg398_1, arg251_1, arg545_1, arg100_1, arg692_1, arg399_1, arg252_1, arg546_1, arg101_1, arg693_1, arg400_1, arg253_1, arg547_1, arg102_1, arg694_1, arg401_1, arg254_1, arg548_1, arg103_1, arg695_1, arg402_1, arg255_1, arg549_1, arg104_1, arg696_1, arg403_1, arg256_1, arg550_1, arg105_1, arg697_1, arg404_1, arg257_1, arg551_1, arg106_1, arg698_1, arg405_1, arg258_1, arg552_1, arg107_1, arg699_1, arg406_1, arg259_1, arg553_1, arg108_1, arg700_1, arg407_1, arg260_1, arg554_1, arg109_1, arg701_1, arg408_1, arg261_1, arg555_1, arg110_1, arg702_1, arg409_1, arg262_1, arg556_1, arg111_1, arg703_1, arg410_1, arg263_1, arg557_1, arg112_1, arg704_1, arg411_1, arg264_1, arg558_1, arg113_1, arg705_1, arg393_1, arg95_1, arg540_1, arg394_1, arg96_1, arg541_1, arg395_1, arg97_1, arg542_1, arg396_1, arg98_1, arg543_1, arg397_1, arg99_1, arg544_1, arg398_1, arg100_1, arg545_1, arg399_1, arg101_1, arg546_1, arg400_1, arg102_1, arg547_1, arg401_1, arg103_1, arg548_1, arg402_1, arg104_1, arg549_1, arg403_1, arg105_1, arg550_1, arg404_1, arg106_1, arg551_1, arg405_1, arg107_1, arg552_1, arg406_1, arg108_1, arg553_1, arg407_1, arg109_1, arg554_1, arg408_1, arg110_1, arg555_1, arg409_1, arg111_1, arg556_1, arg410_1, arg112_1, arg557_1, arg411_1, arg113_1, arg558_1, grid=(10965, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_8.run(arg412_1, arg265_1, arg559_1, arg114_1, arg706_1, arg413_1, arg266_1, arg560_1, arg115_1, arg707_1, arg414_1, arg267_1, arg561_1, arg116_1, arg708_1, arg415_1, arg268_1, arg562_1, arg117_1, arg709_1, arg416_1, arg269_1, arg563_1, arg118_1, arg710_1, arg417_1, arg270_1, arg564_1, arg119_1, arg711_1, arg418_1, arg271_1, arg565_1, arg120_1, arg712_1, arg419_1, arg272_1, arg566_1, arg121_1, arg713_1, arg420_1, arg273_1, arg567_1, arg122_1, arg714_1, arg421_1, arg274_1, arg568_1, arg123_1, arg715_1, arg422_1, arg275_1, arg569_1, arg124_1, arg716_1, arg423_1, arg276_1, arg570_1, arg125_1, arg717_1, arg424_1, arg277_1, arg571_1, arg126_1, arg718_1, arg425_1, arg278_1, arg572_1, arg127_1, arg719_1, arg426_1, arg279_1, arg573_1, arg128_1, arg720_1, arg427_1, arg280_1, arg574_1, arg129_1, arg721_1, arg428_1, arg281_1, arg575_1, arg130_1, arg722_1, arg429_1, arg282_1, arg576_1, arg131_1, arg723_1, arg430_1, arg283_1, arg577_1, arg132_1, arg724_1, arg412_1, arg114_1, arg559_1, arg413_1, arg115_1, arg560_1, arg414_1, arg116_1, arg561_1, arg415_1, arg117_1, arg562_1, arg416_1, arg118_1, arg563_1, arg417_1, arg119_1, arg564_1, arg418_1, arg120_1, arg565_1, arg419_1, arg121_1, arg566_1, arg420_1, arg122_1, arg567_1, arg421_1, arg123_1, arg568_1, arg422_1, arg124_1, arg569_1, arg423_1, arg125_1, arg570_1, arg424_1, arg126_1, arg571_1, arg425_1, arg127_1, arg572_1, arg426_1, arg128_1, arg573_1, arg427_1, arg129_1, arg574_1, arg428_1, arg130_1, arg575_1, arg429_1, arg131_1, arg576_1, arg430_1, arg132_1, arg577_1, grid=(12114, 1, 1), stream=stream0)
+	        # Source Nodes: [], Original ATen: []
+	        triton_for_fused_9.run(arg431_1, arg284_1, arg578_1, arg133_1, arg725_1, arg432_1, arg285_1, arg579_1, arg134_1, arg726_1, arg433_1, arg286_1, arg580_1, arg135_1, arg727_1, arg434_1, arg287_1, arg581_1, arg136_1, arg728_1, arg435_1, arg288_1, arg582_1, arg137_1, arg729_1, arg436_1, arg289_1, arg583_1, arg138_1, arg730_1, arg437_1, arg290_1, arg584_1, arg139_1, arg731_1, arg438_1, arg291_1, arg585_1, arg140_1, arg732_1, arg439_1, arg292_1, arg586_1, arg141_1, arg733_1, arg440_1, arg293_1, arg587_1, arg142_1, arg734_1, arg441_1, arg294_1, arg588_1, arg143_1, arg735_1, arg442_1, arg295_1, arg589_1, arg144_1, arg736_1, arg443_1, arg296_1, arg590_1, arg145_1, arg737_1, arg444_1, arg297_1, arg591_1, arg146_1, arg738_1, arg445_1, arg298_1, arg592_1, arg147_1, arg739_1, arg431_1, arg133_1, arg578_1, arg432_1, arg134_1, arg579_1, arg433_1, arg135_1, arg580_1, arg434_1, arg136_1, arg581_1, arg435_1, arg137_1, arg582_1, arg436_1, arg138_1, arg583_1, arg437_1, arg139_1, arg584_1, arg438_1, arg140_1, arg585_1, arg439_1, arg141_1, arg586_1, arg440_1, arg142_1, arg587_1, arg441_1, arg143_1, arg588_1, arg442_1, arg144_1, arg589_1, arg443_1, arg145_1, arg590_1, arg444_1, arg146_1, arg591_1, arg445_1, arg147_1, arg592_1, grid=(6927, 1, 1), stream=stream0)
+	        del arg0_1
+	        del arg100_1
+	        del arg101_1
+	        del arg102_1
+	        del arg103_1
+	        del arg104_1
+	        del arg105_1
+	        del arg106_1
+	        del arg107_1
+	        del arg108_1
+	        del arg109_1
+	        del arg10_1
+	        del arg110_1
+	        del arg111_1
+	        del arg112_1
+	        del arg113_1
+	        del arg114_1
+	        del arg115_1
+	        del arg116_1
+	        del arg117_1
+	        del arg118_1
+	        del arg119_1
+	        del arg11_1
+	        del arg120_1
+	        del arg121_1
+	        del arg122_1
+	        del arg123_1
+	        del arg124_1
+	        del arg125_1
+	        del arg126_1
+	        del arg127_1
+	        del arg128_1
+	        del arg129_1
+	        del arg12_1
+	        del arg130_1
+	        del arg131_1
+	        del arg132_1
+	        del arg133_1
+	        del arg134_1
+	        del arg135_1
+	        del arg136_1
+	        del arg137_1
+	        del arg138_1
+	        del arg139_1
+	        del arg13_1
+	        del arg140_1
+	        del arg141_1
+	        del arg142_1
+	        del arg143_1
+	        del arg144_1
+	        del arg145_1
+	        del arg146_1
+	        del arg147_1
+	        del arg148_1
+	        del arg149_1
+	        del arg14_1
+	        del arg150_1
+	        del arg151_1
+	        del arg152_1
+	        del arg153_1
+	        del arg154_1
+	        del arg155_1
+	        del arg156_1
+	        del arg157_1
+	        del arg158_1
+	        del arg159_1
+	        del arg15_1
+	        del arg160_1
+	        del arg161_1
+	        del arg162_1
+	        del arg163_1
+	        del arg164_1
+	        del arg165_1
+	        del arg166_1
+	        del arg167_1
+	        del arg168_1
+	        del arg169_1
+	        del arg16_1
+	        del arg170_1
+	        del arg171_1
+	        del arg172_1
+	        del arg173_1
+	        del arg174_1
+	        del arg175_1
+	        del arg176_1
+	        del arg177_1
+	        del arg178_1
+	        del arg179_1
+	        del arg17_1
+	        del arg180_1
+	        del arg181_1
+	        del arg182_1
+	        del arg183_1
+	        del arg184_1
+	        del arg185_1
+	        del arg186_1
+	        del arg187_1
+	        del arg188_1
+	        del arg189_1
+	        del arg18_1
+	        del arg190_1
+	        del arg191_1
+	        del arg192_1
+	        del arg193_1
+	        del arg194_1
+	        del arg195_1
+	        del arg196_1
+	        del arg197_1
+	        del arg198_1
+	        del arg199_1
+	        del arg19_1
+	        del arg1_1
+	        del arg200_1
+	        del arg201_1
+	        del arg202_1
+	        del arg203_1
+	        del arg204_1
+	        del arg205_1
+	        del arg206_1
+	        del arg207_1
+	        del arg208_1
+	        del arg209_1
+	        del arg20_1
+	        del arg210_1
+	        del arg211_1
+	        del arg212_1
+	        del arg213_1
+	        del arg214_1
+	        del arg215_1
+	        del arg216_1
+	        del arg217_1
+	        del arg218_1
+	        del arg219_1
+	        del arg21_1
+	        del arg220_1
+	        del arg221_1
+	        del arg222_1
+	        del arg223_1
+	        del arg224_1
+	        del arg225_1
+	        del arg226_1
+	        del arg227_1
+	        del arg228_1
+	        del arg229_1
+	        del arg22_1
+	        del arg230_1
+	        del arg231_1
+	        del arg232_1
+	        del arg233_1
+	        del arg234_1
+	        del arg235_1
+	        del arg236_1
+	        del arg237_1
+	        del arg238_1
+	        del arg239_1
+	        del arg23_1
+	        del arg240_1
+	        del arg241_1
+	        del arg242_1
+	        del arg243_1
+	        del arg244_1
+	        del arg245_1
+	        del arg246_1
+	        del arg247_1
+	        del arg248_1
+	        del arg249_1
+	        del arg24_1
+	        del arg250_1
+	        del arg251_1
+	        del arg252_1
+	        del arg253_1
+	        del arg254_1
+	        del arg255_1
+	        del arg256_1
+	        del arg257_1
+	        del arg258_1
+	        del arg259_1
+	        del arg25_1
+	        del arg260_1
+	        del arg261_1
+	        del arg262_1
+	        del arg263_1
+	        del arg264_1
+	        del arg265_1
+	        del arg266_1
+	        del arg267_1
+	        del arg268_1
+	        del arg269_1
+	        del arg26_1
+	        del arg270_1
+	        del arg271_1
+	        del arg272_1
+	        del arg273_1
+	        del arg274_1
+	        del arg275_1
+	        del arg276_1
+	        del arg277_1
+	        del arg278_1
+	        del arg279_1
+	        del arg27_1
+	        del arg280_1
+	        del arg281_1
+	        del arg282_1
+	        del arg283_1
+	        del arg284_1
+	        del arg285_1
+	        del arg286_1
+	        del arg287_1
+	        del arg288_1
+	        del arg289_1
+	        del arg28_1
+	        del arg290_1
+	        del arg291_1
+	        del arg292_1
+	        del arg293_1
+	        del arg294_1
+	        del arg295_1
+	        del arg296_1
+	        del arg297_1
+	        del arg298_1
+	        del arg299_1
+	        del arg29_1
+	        del arg2_1
+	        del arg300_1
+	        del arg301_1
+	        del arg302_1
+	        del arg303_1
+	        del arg304_1
+	        del arg305_1
+	        del arg306_1
+	        del arg307_1
+	        del arg308_1
+	        del arg309_1
+	        del arg30_1
+	        del arg310_1
+	        del arg311_1
+	        del arg312_1
+	        del arg313_1
+	        del arg314_1
+	        del arg315_1
+	        del arg316_1
+	        del arg317_1
+	        del arg318_1
+	        del arg319_1
+	        del arg31_1
+	        del arg320_1
+	        del arg321_1
+	        del arg322_1
+	        del arg323_1
+	        del arg324_1
+	        del arg325_1
+	        del arg326_1
+	        del arg327_1
+	        del arg328_1
+	        del arg329_1
+	        del arg32_1
+	        del arg330_1
+	        del arg331_1
+	        del arg332_1
+	        del arg333_1
+	        del arg334_1
+	        del arg335_1
+	        del arg336_1
+	        del arg337_1
+	        del arg338_1
+	        del arg339_1
+	        del arg33_1
+	        del arg340_1
+	        del arg341_1
+	        del arg342_1
+	        del arg343_1
+	        del arg344_1
+	        del arg345_1
+	        del arg346_1
+	        del arg347_1
+	        del arg348_1
+	        del arg349_1
+	        del arg34_1
+	        del arg350_1
+	        del arg351_1
+	        del arg352_1
+	        del arg353_1
+	        del arg354_1
+	        del arg355_1
+	        del arg356_1
+	        del arg357_1
+	        del arg358_1
+	        del arg359_1
+	        del arg35_1
+	        del arg360_1
+	        del arg361_1
+	        del arg362_1
+	        del arg363_1
+	        del arg364_1
+	        del arg365_1
+	        del arg366_1
+	        del arg367_1
+	        del arg368_1
+	        del arg369_1
+	        del arg36_1
+	        del arg370_1
+	        del arg371_1
+	        del arg372_1
+	        del arg373_1
+	        del arg374_1
+	        del arg375_1
+	        del arg376_1
+	        del arg377_1
+	        del arg378_1
+	        del arg379_1
+	        del arg37_1
+	        del arg380_1
+	        del arg381_1
+	        del arg382_1
+	        del arg383_1
+	        del arg384_1
+	        del arg385_1
+	        del arg386_1
+	        del arg387_1
+	        del arg388_1
+	        del arg389_1
+	        del arg38_1
+	        del arg390_1
+	        del arg391_1
+	        del arg392_1
+	        del arg393_1
+	        del arg394_1
+	        del arg395_1
+	        del arg396_1
+	        del arg397_1
+	        del arg398_1
+	        del arg399_1
+	        del arg39_1
+	        del arg3_1
+	        del arg400_1
+	        del arg401_1
+	        del arg402_1
+	        del arg403_1
+	        del arg404_1
+	        del arg405_1
+	        del arg406_1
+	        del arg407_1
+	        del arg408_1
+	        del arg409_1
+	        del arg40_1
+	        del arg410_1
+	        del arg411_1
+	        del arg412_1
+	        del arg413_1
+	        del arg414_1
+	        del arg415_1
+	        del arg416_1
+	        del arg417_1
+	        del arg418_1
+	        del arg419_1
+	        del arg41_1
+	        del arg420_1
+	        del arg421_1
+	        del arg422_1
+	        del arg423_1
+	        del arg424_1
+	        del arg425_1
+	        del arg426_1
+	        del arg427_1
+	        del arg428_1
+	        del arg429_1
+	        del arg42_1
+	        del arg430_1
+	        del arg431_1
+	        del arg432_1
+	        del arg433_1
+	        del arg434_1
+	        del arg435_1
+	        del arg436_1
+	        del arg437_1
+	        del arg438_1
+	        del arg439_1
+	        del arg43_1
+	        del arg440_1
+	        del arg441_1
+	        del arg442_1
+	        del arg443_1
+	        del arg444_1
+	        del arg445_1
+	        del arg446_1
+	        del arg447_1
+	        del arg448_1
+	        del arg449_1
+	        del arg44_1
+	        del arg450_1
+	        del arg451_1
+	        del arg452_1
+	        del arg453_1
+	        del arg454_1
+	        del arg455_1
+	        del arg456_1
+	        del arg457_1
+	        del arg458_1
+	        del arg459_1
+	        del arg45_1
+	        del arg460_1
+	        del arg461_1
+	        del arg462_1
+	        del arg463_1
+	        del arg464_1
+	        del arg465_1
+	        del arg466_1
+	        del arg467_1
+	        del arg468_1
+	        del arg469_1
+	        del arg46_1
+	        del arg470_1
+	        del arg471_1
+	        del arg472_1
+	        del arg473_1
+	        del arg474_1
+	        del arg475_1
+	        del arg476_1
+	        del arg477_1
+	        del arg478_1
+	        del arg479_1
+	        del arg47_1
+	        del arg480_1
+	        del arg481_1
+	        del arg482_1
+	        del arg483_1
+	        del arg484_1
+	        del arg485_1
+	        del arg486_1
+	        del arg487_1
+	        del arg488_1
+	        del arg489_1
+	        del arg48_1
+	        del arg490_1
+	        del arg491_1
+	        del arg492_1
+	        del arg493_1
+	        del arg494_1
+	        del arg495_1
+	        del arg496_1
+	        del arg497_1
+	        del arg498_1
+	        del arg499_1
+	        del arg49_1
+	        del arg4_1
+	        del arg500_1
+	        del arg501_1
+	        del arg502_1
+	        del arg503_1
+	        del arg504_1
+	        del arg505_1
+	        del arg506_1
+	        del arg507_1
+	        del arg508_1
+	        del arg509_1
+	        del arg50_1
+	        del arg510_1
+	        del arg511_1
+	        del arg512_1
+	        del arg513_1
+	        del arg514_1
+	        del arg515_1
+	        del arg516_1
+	        del arg517_1
+	        del arg518_1
+	        del arg519_1
+	        del arg51_1
+	        del arg520_1
+	        del arg521_1
+	        del arg522_1
+	        del arg523_1
+	        del arg524_1
+	        del arg525_1
+	        del arg526_1
+	        del arg527_1
+	        del arg528_1
+	        del arg529_1
+	        del arg52_1
+	        del arg530_1
+	        del arg531_1
+	        del arg532_1
+	        del arg533_1
+	        del arg534_1
+	        del arg535_1
+	        del arg536_1
+	        del arg537_1
+	        del arg538_1
+	        del arg539_1
+	        del arg53_1
+	        del arg540_1
+	        del arg541_1
+	        del arg542_1
+	        del arg543_1
+	        del arg544_1
+	        del arg545_1
+	        del arg546_1
+	        del arg547_1
+	        del arg548_1
+	        del arg549_1
+	        del arg54_1
+	        del arg550_1
+	        del arg551_1
+	        del arg552_1
+	        del arg553_1
+	        del arg554_1
+	        del arg555_1
+	        del arg556_1
+	        del arg557_1
+	        del arg558_1
+	        del arg559_1
+	        del arg55_1
+	        del arg560_1
+	        del arg561_1
+	        del arg562_1
+	        del arg563_1
+	        del arg564_1
+	        del arg565_1
+	        del arg566_1
+	        del arg567_1
+	        del arg568_1
+	        del arg569_1
+	        del arg56_1
+	        del arg570_1
+	        del arg571_1
+	        del arg572_1
+	        del arg573_1
+	        del arg574_1
+	        del arg575_1
+	        del arg576_1
+	        del arg577_1
+	        del arg578_1
+	        del arg579_1
+	        del arg57_1
+	        del arg580_1
+	        del arg581_1
+	        del arg582_1
+	        del arg583_1
+	        del arg584_1
+	        del arg585_1
+	        del arg586_1
+	        del arg587_1
+	        del arg588_1
+	        del arg589_1
+	        del arg58_1
+	        del arg590_1
+	        del arg591_1
+	        del arg592_1
+	        del arg593_1
+	        del arg594_1
+	        del arg595_1
+	        del arg596_1
+	        del arg597_1
+	        del arg598_1
+	        del arg599_1
+	        del arg59_1
+	        del arg5_1
+	        del arg600_1
+	        del arg601_1
+	        del arg602_1
+	        del arg603_1
+	        del arg604_1
+	        del arg605_1
+	        del arg606_1
+	        del arg607_1
+	        del arg608_1
+	        del arg609_1
+	        del arg60_1
+	        del arg610_1
+	        del arg611_1
+	        del arg612_1
+	        del arg613_1
+	        del arg614_1
+	        del arg615_1
+	        del arg616_1
+	        del arg617_1
+	        del arg618_1
+	        del arg619_1
+	        del arg61_1
+	        del arg620_1
+	        del arg621_1
+	        del arg622_1
+	        del arg623_1
+	        del arg624_1
+	        del arg625_1
+	        del arg626_1
+	        del arg627_1
+	        del arg628_1
+	        del arg629_1
+	        del arg62_1
+	        del arg630_1
+	        del arg631_1
+	        del arg632_1
+	        del arg633_1
+	        del arg634_1
+	        del arg635_1
+	        del arg636_1
+	        del arg637_1
+	        del arg638_1
+	        del arg639_1
+	        del arg63_1
+	        del arg640_1
+	        del arg641_1
+	        del arg642_1
+	        del arg643_1
+	        del arg644_1
+	        del arg645_1
+	        del arg646_1
+	        del arg647_1
+	        del arg648_1
+	        del arg649_1
+	        del arg64_1
+	        del arg650_1
+	        del arg651_1
+	        del arg652_1
+	        del arg653_1
+	        del arg654_1
+	        del arg655_1
+	        del arg656_1
+	        del arg657_1
+	        del arg658_1
+	        del arg659_1
+	        del arg65_1
+	        del arg660_1
+	        del arg661_1
+	        del arg662_1
+	        del arg663_1
+	        del arg664_1
+	        del arg665_1
+	        del arg666_1
+	        del arg667_1
+	        del arg668_1
+	        del arg669_1
+	        del arg66_1
+	        del arg670_1
+	        del arg671_1
+	        del arg672_1
+	        del arg673_1
+	        del arg674_1
+	        del arg675_1
+	        del arg676_1
+	        del arg677_1
+	        del arg678_1
+	        del arg679_1
+	        del arg67_1
+	        del arg680_1
+	        del arg681_1
+	        del arg682_1
+	        del arg683_1
+	        del arg684_1
+	        del arg685_1
+	        del arg686_1
+	        del arg687_1
+	        del arg688_1
+	        del arg689_1
+	        del arg68_1
+	        del arg690_1
+	        del arg691_1
+	        del arg692_1
+	        del arg693_1
+	        del arg694_1
+	        del arg695_1
+	        del arg696_1
+	        del arg697_1
+	        del arg698_1
+	        del arg699_1
+	        del arg69_1
+	        del arg6_1
+	        del arg700_1
+	        del arg701_1
+	        del arg702_1
+	        del arg703_1
+	        del arg704_1
+	        del arg705_1
+	        del arg706_1
+	        del arg707_1
+	        del arg708_1
+	        del arg709_1
+	        del arg70_1
+	        del arg710_1
+	        del arg711_1
+	        del arg712_1
+	        del arg713_1
+	        del arg714_1
+	        del arg715_1
+	        del arg716_1
+	        del arg717_1
+	        del arg718_1
+	        del arg719_1
+	        del arg71_1
+	        del arg720_1
+	        del arg721_1
+	        del arg722_1
+	        del arg723_1
+	        del arg724_1
+	        del arg725_1
+	        del arg726_1
+	        del arg727_1
+	        del arg728_1
+	        del arg729_1
+	        del arg72_1
+	        del arg730_1
+	        del arg731_1
+	        del arg732_1
+	        del arg733_1
+	        del arg734_1
+	        del arg735_1
+	        del arg736_1
+	        del arg737_1
+	        del arg738_1
+	        del arg739_1
+	        del arg73_1
+	        del arg74_1
+	        del arg75_1
+	        del arg76_1
+	        del arg77_1
+	        del arg78_1
+	        del arg79_1
+	        del arg7_1
+	        del arg80_1
+	        del arg81_1
+	        del arg82_1
+	        del arg83_1
+	        del arg84_1
+	        del arg85_1
+	        del arg86_1
+	        del arg87_1
+	        del arg88_1
+	        del arg89_1
+	        del arg8_1
+	        del arg90_1
+	        del arg91_1
+	        del arg92_1
+	        del arg93_1
+	        del arg94_1
+	        del arg95_1
+	        del arg96_1
+	        del arg97_1
+	        del arg98_1
+	        del arg99_1
+	        del arg9_1
+	    return ()
+	
+	
+	def benchmark_compiled_module(times=10, repeat=10):
+	    from torch._dynamo.testing import rand_strided
+	    from torch._inductor.utils import print_performance
+	    arg0_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg1_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg2_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg3_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg4_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg5_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg6_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg7_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg8_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg9_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg10_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg11_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg12_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg13_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg14_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg15_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg16_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg17_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg18_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg19_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg20_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg21_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg22_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg23_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg24_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg25_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg26_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg27_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg28_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg29_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg30_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg31_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg32_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg33_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg34_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg35_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg36_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg37_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg38_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg39_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg40_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg41_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg42_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg43_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg44_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg45_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg46_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg47_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg48_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg49_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg50_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg51_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg52_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg53_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg54_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg55_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg56_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg57_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg58_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg59_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg60_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg61_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg62_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg63_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg64_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg65_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg66_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg67_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg68_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg69_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg70_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg71_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg72_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg73_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg74_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg75_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg76_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg77_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg78_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg79_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg80_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg81_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg82_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg83_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg84_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg85_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg86_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg87_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg88_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg89_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg90_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg91_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg92_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg93_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg94_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg95_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg96_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg97_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg98_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg99_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg100_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg101_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg102_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg103_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg104_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg105_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg106_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg107_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg108_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg109_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg110_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg111_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg112_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg113_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg114_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg115_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg116_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg117_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg118_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg119_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg120_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg121_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg122_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg123_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg124_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg125_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg126_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg127_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg128_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg129_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg130_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg131_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg132_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg133_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg134_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg135_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg136_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg137_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg138_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg139_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg140_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg141_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg142_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg143_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg144_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg145_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg146_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg147_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg148_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg149_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg150_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg151_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg152_1 = rand_strided((1024, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg153_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg154_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg155_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg156_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg157_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg158_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg159_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg160_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg161_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg162_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg163_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg164_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg165_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg166_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg167_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg168_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg169_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg170_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg171_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg172_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg173_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg174_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg175_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg176_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg177_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg178_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg179_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg180_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg181_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg182_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg183_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg184_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg185_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg186_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg187_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg188_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg189_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg190_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg191_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg192_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg193_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg194_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg195_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg196_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg197_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg198_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg199_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg200_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg201_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg202_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg203_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg204_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg205_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg206_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg207_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg208_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg209_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg210_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg211_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg212_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg213_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg214_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg215_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg216_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg217_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg218_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg219_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg220_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg221_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg222_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg223_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg224_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg225_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg226_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg227_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg228_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg229_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg230_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg231_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg232_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg233_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg234_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg235_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg236_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg237_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg238_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg239_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg240_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg241_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg242_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg243_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg244_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg245_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg246_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg247_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg248_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg249_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg250_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg251_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg252_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg253_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg254_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg255_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg256_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg257_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg258_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg259_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg260_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg261_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg262_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg263_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg264_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg265_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg266_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg267_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg268_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg269_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg270_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg271_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg272_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg273_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg274_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg275_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg276_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg277_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg278_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg279_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg280_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg281_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg282_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg283_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg284_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg285_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg286_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg287_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg288_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg289_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg290_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg291_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg292_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg293_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg294_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg295_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg296_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg297_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg298_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg299_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg300_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg301_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg302_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg303_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg304_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg305_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg306_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg307_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg308_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg309_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg310_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg311_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg312_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg313_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg314_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg315_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg316_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg317_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg318_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg319_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg320_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg321_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg322_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg323_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg324_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg325_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg326_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg327_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg328_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg329_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg330_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg331_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg332_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg333_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg334_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg335_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg336_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg337_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg338_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg339_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg340_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg341_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg342_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg343_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg344_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg345_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg346_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg347_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg348_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg349_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg350_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg351_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg352_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg353_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg354_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg355_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg356_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg357_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg358_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg359_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg360_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg361_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg362_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg363_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg364_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg365_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg366_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg367_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg368_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg369_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg370_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg371_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg372_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg373_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg374_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg375_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg376_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg377_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg378_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg379_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg380_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg381_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg382_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg383_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg384_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg385_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg386_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg387_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg388_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg389_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg390_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg391_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg392_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg393_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg394_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg395_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg396_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg397_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg398_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg399_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg400_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg401_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg402_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg403_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg404_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg405_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg406_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg407_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg408_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg409_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg410_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg411_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg412_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg413_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg414_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg415_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg416_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg417_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg418_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg419_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg420_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg421_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg422_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg423_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg424_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg425_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg426_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg427_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg428_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg429_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg430_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg431_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg432_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg433_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg434_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg435_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg436_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg437_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg438_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg439_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg440_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg441_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg442_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg443_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg444_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg445_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg446_1 = rand_strided((50304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg447_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg448_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg449_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg450_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg451_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg452_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg453_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg454_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg455_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg456_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg457_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg458_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg459_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg460_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg461_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg462_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg463_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg464_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg465_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg466_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg467_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg468_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg469_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg470_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg471_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg472_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg473_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg474_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg475_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg476_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg477_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg478_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg479_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg480_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg481_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg482_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg483_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg484_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg485_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg486_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg487_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg488_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg489_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg490_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg491_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg492_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg493_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg494_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg495_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg496_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg497_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg498_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg499_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg500_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg501_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg502_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg503_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg504_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg505_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg506_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg507_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg508_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg509_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg510_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg511_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg512_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg513_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg514_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg515_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg516_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg517_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg518_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg519_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg520_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg521_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg522_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg523_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg524_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg525_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg526_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg527_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg528_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg529_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg530_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg531_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg532_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg533_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg534_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg535_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg536_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg537_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg538_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg539_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg540_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg541_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg542_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg543_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg544_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg545_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg546_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg547_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg548_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg549_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg550_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg551_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg552_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg553_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg554_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg555_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg556_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg557_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg558_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg559_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg560_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg561_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg562_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg563_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg564_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg565_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg566_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg567_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg568_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg569_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg570_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg571_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg572_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg573_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg574_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg575_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg576_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg577_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg578_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg579_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg580_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg581_1 = rand_strided((2304, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg582_1 = rand_strided((2304, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg583_1 = rand_strided((768, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg584_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg585_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg586_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg587_1 = rand_strided((3072, 768), (768, 1), device='cuda:0', dtype=torch.float32)
+	    arg588_1 = rand_strided((3072, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg589_1 = rand_strided((768, 3072), (3072, 1), device='cuda:0', dtype=torch.float32)
+	    arg590_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg591_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg592_1 = rand_strided((768, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    arg593_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg594_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg595_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg596_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg597_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg598_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg599_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg600_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg601_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg602_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg603_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg604_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg605_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg606_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg607_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg608_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg609_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg610_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg611_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg612_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg613_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg614_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg615_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg616_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg617_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg618_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg619_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg620_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg621_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg622_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg623_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg624_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg625_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg626_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg627_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg628_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg629_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg630_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg631_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg632_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg633_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg634_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg635_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg636_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg637_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg638_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg639_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg640_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg641_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg642_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg643_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg644_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg645_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg646_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg647_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg648_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg649_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg650_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg651_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg652_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg653_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg654_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg655_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg656_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg657_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg658_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg659_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg660_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg661_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg662_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg663_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg664_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg665_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg666_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg667_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg668_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg669_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg670_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg671_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg672_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg673_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg674_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg675_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg676_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg677_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg678_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg679_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg680_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg681_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg682_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg683_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg684_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg685_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg686_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg687_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg688_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg689_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg690_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg691_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg692_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg693_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg694_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg695_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg696_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg697_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg698_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg699_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg700_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg701_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg702_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg703_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg704_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg705_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg706_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg707_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg708_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg709_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg710_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg711_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg712_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg713_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg714_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg715_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg716_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg717_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg718_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg719_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg720_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg721_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg722_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg723_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg724_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg725_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg726_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg727_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg728_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg729_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg730_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg731_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg732_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg733_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg734_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg735_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg736_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg737_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg738_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    arg739_1 = rand_strided((), (), device='cuda:0', dtype=torch.float32)
+	    fn = lambda: call([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1])
+	    return print_performance(fn, times=times, repeat=repeat)
+	
+	
+	if __name__ == "__main__":
+	    from torch._inductor.wrapper_benchmark import compiled_module_main
+	    compiled_module_main('nanogpt', benchmark_compiled_module)
+	
+V0806 13:56:22.230000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "9712c78834c2d72b350fa84c50d70770"}
+	{
+	"name": "code_gen",
+	"ts": 1722977782230623.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.230000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "abaf421eb449f454f1dd958665f461eb"}
+	{
+	"name": "GraphLowering.compile_to_module",
+	"ts": 1722977782230854.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.384000 4107173 torch/_dynamo/utils.py:838] {"chromium_event": {}, "has_payload": "5e958fee1956dad5abf3d91b2bc0b309"}
+	{
+	"name": "fx_graph_cache_miss",
+	"ts": 1722977770798463.8,
+	"args": {
+	"key": "f2hzi4mmzauwdbyib6zmykorraxjbqftyvglo6f4mz2b36wljiti",
+	"cache_state": "miss",
+	"components": [
+	"[i5hietdxt6dlkcrwbpsvei6udef6z3eec54zo7cpjzbybmgvi7b] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1):\n    _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)\n    getitem = _foreach_add[0]\n    getitem_1 = _foreach_add[1]\n    getitem_2 = _foreach_add[2]\n    getitem_3 = _foreach_add[3]\n    getitem_4 = _foreach_add[4]\n    getitem_5 = _foreach_add[5]\n    getitem_6 = _foreach_add[6]\n    getitem_7 = _foreach_add[7]\n    getitem_8 = _foreach_add[8]\n    getitem_9 = _foreach_add[9]\n    getitem_10 = _foreach_add[10]\n    getitem_11 = _foreach_add[11]\n    getitem_12 = _foreach_add[12]\n    getitem_13 = _foreach_add[13]\n    getitem_14 = _foreach_add[14]\n    getitem_15 = _foreach_add[15]\n    getitem_16 = _foreach_add[16]\n    getitem_17 = _foreach_add[17]\n    getitem_18 = _foreach_add[18]\n    getitem_19 = _foreach_add[19]\n    getitem_20 = _foreach_add[20]\n    getitem_21 = _foreach_add[21]\n    getitem_22 = _foreach_add[22]\n    getitem_23 = _foreach_add[23]\n    getitem_24 = _foreach_add[24]\n    getitem_25 = _foreach_add[25]\n    getitem_26 = _foreach_add[26]\n    getitem_27 = _foreach_add[27]\n    getitem_28 = _foreach_add[28]\n    getitem_29 = _foreach_add[29]\n    getitem_30 = _foreach_add[30]\n    getitem_31 = _foreach_add[31]\n    getitem_32 = _foreach_add[32]\n    getitem_33 = _foreach_add[33]\n    getitem_34 = _foreach_add[34]\n    getitem_35 = _foreach_add[35]\n    getitem_36 = _foreach_add[36]\n    getitem_37 = _foreach_add[37]\n    getitem_38 = _foreach_add[38]\n    getitem_39 = _foreach_add[39]\n    getitem_40 = _foreach_add[40]\n    getitem_41 = _foreach_add[41]\n    getitem_42 = _foreach_add[42]\n    getitem_43 = _foreach_add[43]\n    getitem_44 = _foreach_add[44]\n    getitem_45 = _foreach_add[45]\n    getitem_46 = _foreach_add[46]\n    getitem_47 = _foreach_add[47]\n    getitem_48 = _foreach_add[48]\n    getitem_49 = _foreach_add[49]\n    getitem_50 = _foreach_add[50]\n    getitem_51 = _foreach_add[51]\n    getitem_52 = _foreach_add[52]\n    getitem_53 = _foreach_add[53]\n    getitem_54 = _foreach_add[54]\n    getitem_55 = _foreach_add[55]\n    getitem_56 = _foreach_add[56]\n    getitem_57 = _foreach_add[57]\n    getitem_58 = _foreach_add[58]\n    getitem_59 = _foreach_add[59]\n    getitem_60 = _foreach_add[60]\n    getitem_61 = _foreach_add[61]\n    getitem_62 = _foreach_add[62]\n    getitem_63 = _foreach_add[63]\n    getitem_64 = _foreach_add[64]\n    getitem_65 = _foreach_add[65]\n    getitem_66 = _foreach_add[66]\n    getitem_67 = _foreach_add[67]\n    getitem_68 = _foreach_add[68]\n    getitem_69 = _foreach_add[69]\n    getitem_70 = _foreach_add[70]\n    getitem_71 = _foreach_add[71]\n    getitem_72 = _foreach_add[72]\n    getitem_73 = _foreach_add[73]\n    getitem_74 = _foreach_add[74]\n    getitem_75 = _foreach_add[75]\n    getitem_76 = _foreach_add[76]\n    getitem_77 = _foreach_add[77]\n    getitem_78 = _foreach_add[78]\n    getitem_79 = _foreach_add[79]\n    getitem_80 = _foreach_add[80]\n    getitem_81 = _foreach_add[81]\n    getitem_82 = _foreach_add[82]\n    getitem_83 = _foreach_add[83]\n    getitem_84 = _foreach_add[84]\n    getitem_85 = _foreach_add[85]\n    getitem_86 = _foreach_add[86]\n    getitem_87 = _foreach_add[87]\n    getitem_88 = _foreach_add[88]\n    getitem_89 = _foreach_add[89]\n    getitem_90 = _foreach_add[90]\n    getitem_91 = _foreach_add[91]\n    getitem_92 = _foreach_add[92]\n    getitem_93 = _foreach_add[93]\n    getitem_94 = _foreach_add[94]\n    getitem_95 = _foreach_add[95]\n    getitem_96 = _foreach_add[96]\n    getitem_97 = _foreach_add[97]\n    getitem_98 = _foreach_add[98]\n    getitem_99 = _foreach_add[99]\n    getitem_100 = _foreach_add[100]\n    getitem_101 = _foreach_add[101]\n    getitem_102 = _foreach_add[102]\n    getitem_103 = _foreach_add[103]\n    getitem_104 = _foreach_add[104]\n    getitem_105 = _foreach_add[105]\n    getitem_106 = _foreach_add[106]\n    getitem_107 = _foreach_add[107]\n    getitem_108 = _foreach_add[108]\n    getitem_109 = _foreach_add[109]\n    getitem_110 = _foreach_add[110]\n    getitem_111 = _foreach_add[111]\n    getitem_112 = _foreach_add[112]\n    getitem_113 = _foreach_add[113]\n    getitem_114 = _foreach_add[114]\n    getitem_115 = _foreach_add[115]\n    getitem_116 = _foreach_add[116]\n    getitem_117 = _foreach_add[117]\n    getitem_118 = _foreach_add[118]\n    getitem_119 = _foreach_add[119]\n    getitem_120 = _foreach_add[120]\n    getitem_121 = _foreach_add[121]\n    getitem_122 = _foreach_add[122]\n    getitem_123 = _foreach_add[123]\n    getitem_124 = _foreach_add[124]\n    getitem_125 = _foreach_add[125]\n    getitem_126 = _foreach_add[126]\n    getitem_127 = _foreach_add[127]\n    getitem_128 = _foreach_add[128]\n    getitem_129 = _foreach_add[129]\n    getitem_130 = _foreach_add[130]\n    getitem_131 = _foreach_add[131]\n    getitem_132 = _foreach_add[132]\n    getitem_133 = _foreach_add[133]\n    getitem_134 = _foreach_add[134]\n    getitem_135 = _foreach_add[135]\n    getitem_136 = _foreach_add[136]\n    getitem_137 = _foreach_add[137]\n    getitem_138 = _foreach_add[138]\n    getitem_139 = _foreach_add[139]\n    getitem_140 = _foreach_add[140]\n    getitem_141 = _foreach_add[141]\n    getitem_142 = _foreach_add[142]\n    getitem_143 = _foreach_add[143]\n    getitem_144 = _foreach_add[144]\n    getitem_145 = _foreach_add[145]\n    getitem_146 = _foreach_add[146]\n    getitem_147 = _foreach_add[147];  _foreach_add = None\n    _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])\n    getitem_148 = _foreach_sub[0]\n    getitem_149 = _foreach_sub[1]\n    getitem_150 = _foreach_sub[2]\n    getitem_151 = _foreach_sub[3]\n    getitem_152 = _foreach_sub[4]\n    getitem_153 = _foreach_sub[5]\n    getitem_154 = _foreach_sub[6]\n    getitem_155 = _foreach_sub[7]\n    getitem_156 = _foreach_sub[8]\n    getitem_157 = _foreach_sub[9]\n    getitem_158 = _foreach_sub[10]\n    getitem_159 = _foreach_sub[11]\n    getitem_160 = _foreach_sub[12]\n    getitem_161 = _foreach_sub[13]\n    getitem_162 = _foreach_sub[14]\n    getitem_163 = _foreach_sub[15]\n    getitem_164 = _foreach_sub[16]\n    getitem_165 = _foreach_sub[17]\n    getitem_166 = _foreach_sub[18]\n    getitem_167 = _foreach_sub[19]\n    getitem_168 = _foreach_sub[20]\n    getitem_169 = _foreach_sub[21]\n    getitem_170 = _foreach_sub[22]\n    getitem_171 = _foreach_sub[23]\n    getitem_172 = _foreach_sub[24]\n    getitem_173 = _foreach_sub[25]\n    getitem_174 = _foreach_sub[26]\n    getitem_175 = _foreach_sub[27]\n    getitem_176 = _foreach_sub[28]\n    getitem_177 = _foreach_sub[29]\n    getitem_178 = _foreach_sub[30]\n    getitem_179 = _foreach_sub[31]\n    getitem_180 = _foreach_sub[32]\n    getitem_181 = _foreach_sub[33]\n    getitem_182 = _foreach_sub[34]\n    getitem_183 = _foreach_sub[35]\n    getitem_184 = _foreach_sub[36]\n    getitem_185 = _foreach_sub[37]\n    getitem_186 = _foreach_sub[38]\n    getitem_187 = _foreach_sub[39]\n    getitem_188 = _foreach_sub[40]\n    getitem_189 = _foreach_sub[41]\n    getitem_190 = _foreach_sub[42]\n    getitem_191 = _foreach_sub[43]\n    getitem_192 = _foreach_sub[44]\n    getitem_193 = _foreach_sub[45]\n    getitem_194 = _foreach_sub[46]\n    getitem_195 = _foreach_sub[47]\n    getitem_196 = _foreach_sub[48]\n    getitem_197 = _foreach_sub[49]\n    getitem_198 = _foreach_sub[50]\n    getitem_199 = _foreach_sub[51]\n    getitem_200 = _foreach_sub[52]\n    getitem_201 = _foreach_sub[53]\n    getitem_202 = _foreach_sub[54]\n    getitem_203 = _foreach_sub[55]\n    getitem_204 = _foreach_sub[56]\n    getitem_205 = _foreach_sub[57]\n    getitem_206 = _foreach_sub[58]\n    getitem_207 = _foreach_sub[59]\n    getitem_208 = _foreach_sub[60]\n    getitem_209 = _foreach_sub[61]\n    getitem_210 = _foreach_sub[62]\n    getitem_211 = _foreach_sub[63]\n    getitem_212 = _foreach_sub[64]\n    getitem_213 = _foreach_sub[65]\n    getitem_214 = _foreach_sub[66]\n    getitem_215 = _foreach_sub[67]\n    getitem_216 = _foreach_sub[68]\n    getitem_217 = _foreach_sub[69]\n    getitem_218 = _foreach_sub[70]\n    getitem_219 = _foreach_sub[71]\n    getitem_220 = _foreach_sub[72]\n    getitem_221 = _foreach_sub[73]\n    getitem_222 = _foreach_sub[74]\n    getitem_223 = _foreach_sub[75]\n    getitem_224 = _foreach_sub[76]\n    getitem_225 = _foreach_sub[77]\n    getitem_226 = _foreach_sub[78]\n    getitem_227 = _foreach_sub[79]\n    getitem_228 = _foreach_sub[80]\n    getitem_229 = _foreach_sub[81]\n    getitem_230 = _foreach_sub[82]\n    getitem_231 = _foreach_sub[83]\n    getitem_232 = _foreach_sub[84]\n    getitem_233 = _foreach_sub[85]\n    getitem_234 = _foreach_sub[86]\n    getitem_235 = _foreach_sub[87]\n    getitem_236 = _foreach_sub[88]\n    getitem_237 = _foreach_sub[89]\n    getitem_238 = _foreach_sub[90]\n    getitem_239 = _foreach_sub[91]\n    getitem_240 = _foreach_sub[92]\n    getitem_241 = _foreach_sub[93]\n    getitem_242 = _foreach_sub[94]\n    getitem_243 = _foreach_sub[95]\n    getitem_244 = _foreach_sub[96]\n    getitem_245 = _foreach_sub[97]\n    getitem_246 = _foreach_sub[98]\n    getitem_247 = _foreach_sub[99]\n    getitem_248 = _foreach_sub[100]\n    getitem_249 = _foreach_sub[101]\n    getitem_250 = _foreach_sub[102]\n    getitem_251 = _foreach_sub[103]\n    getitem_252 = _foreach_sub[104]\n    getitem_253 = _foreach_sub[105]\n    getitem_254 = _foreach_sub[106]\n    getitem_255 = _foreach_sub[107]\n    getitem_256 = _foreach_sub[108]\n    getitem_257 = _foreach_sub[109]\n    getitem_258 = _foreach_sub[110]\n    getitem_259 = _foreach_sub[111]\n    getitem_260 = _foreach_sub[112]\n    getitem_261 = _foreach_sub[113]\n    getitem_262 = _foreach_sub[114]\n    getitem_263 = _foreach_sub[115]\n    getitem_264 = _foreach_sub[116]\n    getitem_265 = _foreach_sub[117]\n    getitem_266 = _foreach_sub[118]\n    getitem_267 = _foreach_sub[119]\n    getitem_268 = _foreach_sub[120]\n    getitem_269 = _foreach_sub[121]\n    getitem_270 = _foreach_sub[122]\n    getitem_271 = _foreach_sub[123]\n    getitem_272 = _foreach_sub[124]\n    getitem_273 = _foreach_sub[125]\n    getitem_274 = _foreach_sub[126]\n    getitem_275 = _foreach_sub[127]\n    getitem_276 = _foreach_sub[128]\n    getitem_277 = _foreach_sub[129]\n    getitem_278 = _foreach_sub[130]\n    getitem_279 = _foreach_sub[131]\n    getitem_280 = _foreach_sub[132]\n    getitem_281 = _foreach_sub[133]\n    getitem_282 = _foreach_sub[134]\n    getitem_283 = _foreach_sub[135]\n    getitem_284 = _foreach_sub[136]\n    getitem_285 = _foreach_sub[137]\n    getitem_286 = _foreach_sub[138]\n    getitem_287 = _foreach_sub[139]\n    getitem_288 = _foreach_sub[140]\n    getitem_289 = _foreach_sub[141]\n    getitem_290 = _foreach_sub[142]\n    getitem_291 = _foreach_sub[143]\n    getitem_292 = _foreach_sub[144]\n    getitem_293 = _foreach_sub[145]\n    getitem_294 = _foreach_sub[146]\n    getitem_295 = _foreach_sub[147];  _foreach_sub = None\n    _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998);  getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None\n    getitem_296 = _foreach_mul[0]\n    getitem_297 = _foreach_mul[1]\n    getitem_298 = _foreach_mul[2]\n    getitem_299 = _foreach_mul[3]\n    getitem_300 = _foreach_mul[4]\n    getitem_301 = _foreach_mul[5]\n    getitem_302 = _foreach_mul[6]\n    getitem_303 = _foreach_mul[7]\n    getitem_304 = _foreach_mul[8]\n    getitem_305 = _foreach_mul[9]\n    getitem_306 = _foreach_mul[10]\n    getitem_307 = _foreach_mul[11]\n    getitem_308 = _foreach_mul[12]\n    getitem_309 = _foreach_mul[13]\n    getitem_310 = _foreach_mul[14]\n    getitem_311 = _foreach_mul[15]\n    getitem_312 = _foreach_mul[16]\n    getitem_313 = _foreach_mul[17]\n    getitem_314 = _foreach_mul[18]\n    getitem_315 = _foreach_mul[19]\n    getitem_316 = _foreach_mul[20]\n    getitem_317 = _foreach_mul[21]\n    getitem_318 = _foreach_mul[22]\n    getitem_319 = _foreach_mul[23]\n    getitem_320 = _foreach_mul[24]\n    getitem_321 = _foreach_mul[25]\n    getitem_322 = _foreach_mul[26]\n    getitem_323 = _foreach_mul[27]\n    getitem_324 = _foreach_mul[28]\n    getitem_325 = _foreach_mul[29]\n    getitem_326 = _foreach_mul[30]\n    getitem_327 = _foreach_mul[31]\n    getitem_328 = _foreach_mul[32]\n    getitem_329 = _foreach_mul[33]\n    getitem_330 = _foreach_mul[34]\n    getitem_331 = _foreach_mul[35]\n    getitem_332 = _foreach_mul[36]\n    getitem_333 = _foreach_mul[37]\n    getitem_334 = _foreach_mul[38]\n    getitem_335 = _foreach_mul[39]\n    getitem_336 = _foreach_mul[40]\n    getitem_337 = _foreach_mul[41]\n    getitem_338 = _foreach_mul[42]\n    getitem_339 = _foreach_mul[43]\n    getitem_340 = _foreach_mul[44]\n    getitem_341 = _foreach_mul[45]\n    getitem_342 = _foreach_mul[46]\n    getitem_343 = _foreach_mul[47]\n    getitem_344 = _foreach_mul[48]\n    getitem_345 = _foreach_mul[49]\n    getitem_346 = _foreach_mul[50]\n    getitem_347 = _foreach_mul[51]\n    getitem_348 = _foreach_mul[52]\n    getitem_349 = _foreach_mul[53]\n    getitem_350 = _foreach_mul[54]\n    getitem_351 = _foreach_mul[55]\n    getitem_352 = _foreach_mul[56]\n    getitem_353 = _foreach_mul[57]\n    getitem_354 = _foreach_mul[58]\n    getitem_355 = _foreach_mul[59]\n    getitem_356 = _foreach_mul[60]\n    getitem_357 = _foreach_mul[61]\n    getitem_358 = _foreach_mul[62]\n    getitem_359 = _foreach_mul[63]\n    getitem_360 = _foreach_mul[64]\n    getitem_361 = _foreach_mul[65]\n    getitem_362 = _foreach_mul[66]\n    getitem_363 = _foreach_mul[67]\n    getitem_364 = _foreach_mul[68]\n    getitem_365 = _foreach_mul[69]\n    getitem_366 = _foreach_mul[70]\n    getitem_367 = _foreach_mul[71]\n    getitem_368 = _foreach_mul[72]\n    getitem_369 = _foreach_mul[73]\n    getitem_370 = _foreach_mul[74]\n    getitem_371 = _foreach_mul[75]\n    getitem_372 = _foreach_mul[76]\n    getitem_373 = _foreach_mul[77]\n    getitem_374 = _foreach_mul[78]\n    getitem_375 = _foreach_mul[79]\n    getitem_376 = _foreach_mul[80]\n    getitem_377 = _foreach_mul[81]\n    getitem_378 = _foreach_mul[82]\n    getitem_379 = _foreach_mul[83]\n    getitem_380 = _foreach_mul[84]\n    getitem_381 = _foreach_mul[85]\n    getitem_382 = _foreach_mul[86]\n    getitem_383 = _foreach_mul[87]\n    getitem_384 = _foreach_mul[88]\n    getitem_385 = _foreach_mul[89]\n    getitem_386 = _foreach_mul[90]\n    getitem_387 = _foreach_mul[91]\n    getitem_388 = _foreach_mul[92]\n    getitem_389 = _foreach_mul[93]\n    getitem_390 = _foreach_mul[94]\n    getitem_391 = _foreach_mul[95]\n    getitem_392 = _foreach_mul[96]\n    getitem_393 = _foreach_mul[97]\n    getitem_394 = _foreach_mul[98]\n    getitem_395 = _foreach_mul[99]\n    getitem_396 = _foreach_mul[100]\n    getitem_397 = _foreach_mul[101]\n    getitem_398 = _foreach_mul[102]\n    getitem_399 = _foreach_mul[103]\n    getitem_400 = _foreach_mul[104]\n    getitem_401 = _foreach_mul[105]\n    getitem_402 = _foreach_mul[106]\n    getitem_403 = _foreach_mul[107]\n    getitem_404 = _foreach_mul[108]\n    getitem_405 = _foreach_mul[109]\n    getitem_406 = _foreach_mul[110]\n    getitem_407 = _foreach_mul[111]\n    getitem_408 = _foreach_mul[112]\n    getitem_409 = _foreach_mul[113]\n    getitem_410 = _foreach_mul[114]\n    getitem_411 = _foreach_mul[115]\n    getitem_412 = _foreach_mul[116]\n    getitem_413 = _foreach_mul[117]\n    getitem_414 = _foreach_mul[118]\n    getitem_415 = _foreach_mul[119]\n    getitem_416 = _foreach_mul[120]\n    getitem_417 = _foreach_mul[121]\n    getitem_418 = _foreach_mul[122]\n    getitem_419 = _foreach_mul[123]\n    getitem_420 = _foreach_mul[124]\n    getitem_421 = _foreach_mul[125]\n    getitem_422 = _foreach_mul[126]\n    getitem_423 = _foreach_mul[127]\n    getitem_424 = _foreach_mul[128]\n    getitem_425 = _foreach_mul[129]\n    getitem_426 = _foreach_mul[130]\n    getitem_427 = _foreach_mul[131]\n    getitem_428 = _foreach_mul[132]\n    getitem_429 = _foreach_mul[133]\n    getitem_430 = _foreach_mul[134]\n    getitem_431 = _foreach_mul[135]\n    getitem_432 = _foreach_mul[136]\n    getitem_433 = _foreach_mul[137]\n    getitem_434 = _foreach_mul[138]\n    getitem_435 = _foreach_mul[139]\n    getitem_436 = _foreach_mul[140]\n    getitem_437 = _foreach_mul[141]\n    getitem_438 = _foreach_mul[142]\n    getitem_439 = _foreach_mul[143]\n    getitem_440 = _foreach_mul[144]\n    getitem_441 = _foreach_mul[145]\n    getitem_442 = _foreach_mul[146]\n    getitem_443 = _foreach_mul[147];  _foreach_mul = None\n    _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]);  getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None\n    getitem_444 = _foreach_add_1[0]\n    getitem_445 = _foreach_add_1[1]\n    getitem_446 = _foreach_add_1[2]\n    getitem_447 = _foreach_add_1[3]\n    getitem_448 = _foreach_add_1[4]\n    getitem_449 = _foreach_add_1[5]\n    getitem_450 = _foreach_add_1[6]\n    getitem_451 = _foreach_add_1[7]\n    getitem_452 = _foreach_add_1[8]\n    getitem_453 = _foreach_add_1[9]\n    getitem_454 = _foreach_add_1[10]\n    getitem_455 = _foreach_add_1[11]\n    getitem_456 = _foreach_add_1[12]\n    getitem_457 = _foreach_add_1[13]\n    getitem_458 = _foreach_add_1[14]\n    getitem_459 = _foreach_add_1[15]\n    getitem_460 = _foreach_add_1[16]\n    getitem_461 = _foreach_add_1[17]\n    getitem_462 = _foreach_add_1[18]\n    getitem_463 = _foreach_add_1[19]\n    getitem_464 = _foreach_add_1[20]\n    getitem_465 = _foreach_add_1[21]\n    getitem_466 = _foreach_add_1[22]\n    getitem_467 = _foreach_add_1[23]\n    getitem_468 = _foreach_add_1[24]\n    getitem_469 = _foreach_add_1[25]\n    getitem_470 = _foreach_add_1[26]\n    getitem_471 = _foreach_add_1[27]\n    getitem_472 = _foreach_add_1[28]\n    getitem_473 = _foreach_add_1[29]\n    getitem_474 = _foreach_add_1[30]\n    getitem_475 = _foreach_add_1[31]\n    getitem_476 = _foreach_add_1[32]\n    getitem_477 = _foreach_add_1[33]\n    getitem_478 = _foreach_add_1[34]\n    getitem_479 = _foreach_add_1[35]\n    getitem_480 = _foreach_add_1[36]\n    getitem_481 = _foreach_add_1[37]\n    getitem_482 = _foreach_add_1[38]\n    getitem_483 = _foreach_add_1[39]\n    getitem_484 = _foreach_add_1[40]\n    getitem_485 = _foreach_add_1[41]\n    getitem_486 = _foreach_add_1[42]\n    getitem_487 = _foreach_add_1[43]\n    getitem_488 = _foreach_add_1[44]\n    getitem_489 = _foreach_add_1[45]\n    getitem_490 = _foreach_add_1[46]\n    getitem_491 = _foreach_add_1[47]\n    getitem_492 = _foreach_add_1[48]\n    getitem_493 = _foreach_add_1[49]\n    getitem_494 = _foreach_add_1[50]\n    getitem_495 = _foreach_add_1[51]\n    getitem_496 = _foreach_add_1[52]\n    getitem_497 = _foreach_add_1[53]\n    getitem_498 = _foreach_add_1[54]\n    getitem_499 = _foreach_add_1[55]\n    getitem_500 = _foreach_add_1[56]\n    getitem_501 = _foreach_add_1[57]\n    getitem_502 = _foreach_add_1[58]\n    getitem_503 = _foreach_add_1[59]\n    getitem_504 = _foreach_add_1[60]\n    getitem_505 = _foreach_add_1[61]\n    getitem_506 = _foreach_add_1[62]\n    getitem_507 = _foreach_add_1[63]\n    getitem_508 = _foreach_add_1[64]\n    getitem_509 = _foreach_add_1[65]\n    getitem_510 = _foreach_add_1[66]\n    getitem_511 = _foreach_add_1[67]\n    getitem_512 = _foreach_add_1[68]\n    getitem_513 = _foreach_add_1[69]\n    getitem_514 = _foreach_add_1[70]\n    getitem_515 = _foreach_add_1[71]\n    getitem_516 = _foreach_add_1[72]\n    getitem_517 = _foreach_add_1[73]\n    getitem_518 = _foreach_add_1[74]\n    getitem_519 = _foreach_add_1[75]\n    getitem_520 = _foreach_add_1[76]\n    getitem_521 = _foreach_add_1[77]\n    getitem_522 = _foreach_add_1[78]\n    getitem_523 = _foreach_add_1[79]\n    getitem_524 = _foreach_add_1[80]\n    getitem_525 = _foreach_add_1[81]\n    getitem_526 = _foreach_add_1[82]\n    getitem_527 = _foreach_add_1[83]\n    getitem_528 = _foreach_add_1[84]\n    getitem_529 = _foreach_add_1[85]\n    getitem_530 = _foreach_add_1[86]\n    getitem_531 = _foreach_add_1[87]\n    getitem_532 = _foreach_add_1[88]\n    getitem_533 = _foreach_add_1[89]\n    getitem_534 = _foreach_add_1[90]\n    getitem_535 = _foreach_add_1[91]\n    getitem_536 = _foreach_add_1[92]\n    getitem_537 = _foreach_add_1[93]\n    getitem_538 = _foreach_add_1[94]\n    getitem_539 = _foreach_add_1[95]\n    getitem_540 = _foreach_add_1[96]\n    getitem_541 = _foreach_add_1[97]\n    getitem_542 = _foreach_add_1[98]\n    getitem_543 = _foreach_add_1[99]\n    getitem_544 = _foreach_add_1[100]\n    getitem_545 = _foreach_add_1[101]\n    getitem_546 = _foreach_add_1[102]\n    getitem_547 = _foreach_add_1[103]\n    getitem_548 = _foreach_add_1[104]\n    getitem_549 = _foreach_add_1[105]\n    getitem_550 = _foreach_add_1[106]\n    getitem_551 = _foreach_add_1[107]\n    getitem_552 = _foreach_add_1[108]\n    getitem_553 = _foreach_add_1[109]\n    getitem_554 = _foreach_add_1[110]\n    getitem_555 = _foreach_add_1[111]\n    getitem_556 = _foreach_add_1[112]\n    getitem_557 = _foreach_add_1[113]\n    getitem_558 = _foreach_add_1[114]\n    getitem_559 = _foreach_add_1[115]\n    getitem_560 = _foreach_add_1[116]\n    getitem_561 = _foreach_add_1[117]\n    getitem_562 = _foreach_add_1[118]\n    getitem_563 = _foreach_add_1[119]\n    getitem_564 = _foreach_add_1[120]\n    getitem_565 = _foreach_add_1[121]\n    getitem_566 = _foreach_add_1[122]\n    getitem_567 = _foreach_add_1[123]\n    getitem_568 = _foreach_add_1[124]\n    getitem_569 = _foreach_add_1[125]\n    getitem_570 = _foreach_add_1[126]\n    getitem_571 = _foreach_add_1[127]\n    getitem_572 = _foreach_add_1[128]\n    getitem_573 = _foreach_add_1[129]\n    getitem_574 = _foreach_add_1[130]\n    getitem_575 = _foreach_add_1[131]\n    getitem_576 = _foreach_add_1[132]\n    getitem_577 = _foreach_add_1[133]\n    getitem_578 = _foreach_add_1[134]\n    getitem_579 = _foreach_add_1[135]\n    getitem_580 = _foreach_add_1[136]\n    getitem_581 = _foreach_add_1[137]\n    getitem_582 = _foreach_add_1[138]\n    getitem_583 = _foreach_add_1[139]\n    getitem_584 = _foreach_add_1[140]\n    getitem_585 = _foreach_add_1[141]\n    getitem_586 = _foreach_add_1[142]\n    getitem_587 = _foreach_add_1[143]\n    getitem_588 = _foreach_add_1[144]\n    getitem_589 = _foreach_add_1[145]\n    getitem_590 = _foreach_add_1[146]\n    getitem_591 = _foreach_add_1[147];  _foreach_add_1 = None\n    _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)\n    getitem_592 = _foreach_mul_1[0]\n    getitem_593 = _foreach_mul_1[1]\n    getitem_594 = _foreach_mul_1[2]\n    getitem_595 = _foreach_mul_1[3]\n    getitem_596 = _foreach_mul_1[4]\n    getitem_597 = _foreach_mul_1[5]\n    getitem_598 = _foreach_mul_1[6]\n    getitem_599 = _foreach_mul_1[7]\n    getitem_600 = _foreach_mul_1[8]\n    getitem_601 = _foreach_mul_1[9]\n    getitem_602 = _foreach_mul_1[10]\n    getitem_603 = _foreach_mul_1[11]\n    getitem_604 = _foreach_mul_1[12]\n    getitem_605 = _foreach_mul_1[13]\n    getitem_606 = _foreach_mul_1[14]\n    getitem_607 = _foreach_mul_1[15]\n    getitem_608 = _foreach_mul_1[16]\n    getitem_609 = _foreach_mul_1[17]\n    getitem_610 = _foreach_mul_1[18]\n    getitem_611 = _foreach_mul_1[19]\n    getitem_612 = _foreach_mul_1[20]\n    getitem_613 = _foreach_mul_1[21]\n    getitem_614 = _foreach_mul_1[22]\n    getitem_615 = _foreach_mul_1[23]\n    getitem_616 = _foreach_mul_1[24]\n    getitem_617 = _foreach_mul_1[25]\n    getitem_618 = _foreach_mul_1[26]\n    getitem_619 = _foreach_mul_1[27]\n    getitem_620 = _foreach_mul_1[28]\n    getitem_621 = _foreach_mul_1[29]\n    getitem_622 = _foreach_mul_1[30]\n    getitem_623 = _foreach_mul_1[31]\n    getitem_624 = _foreach_mul_1[32]\n    getitem_625 = _foreach_mul_1[33]\n    getitem_626 = _foreach_mul_1[34]\n    getitem_627 = _foreach_mul_1[35]\n    getitem_628 = _foreach_mul_1[36]\n    getitem_629 = _foreach_mul_1[37]\n    getitem_630 = _foreach_mul_1[38]\n    getitem_631 = _foreach_mul_1[39]\n    getitem_632 = _foreach_mul_1[40]\n    getitem_633 = _foreach_mul_1[41]\n    getitem_634 = _foreach_mul_1[42]\n    getitem_635 = _foreach_mul_1[43]\n    getitem_636 = _foreach_mul_1[44]\n    getitem_637 = _foreach_mul_1[45]\n    getitem_638 = _foreach_mul_1[46]\n    getitem_639 = _foreach_mul_1[47]\n    getitem_640 = _foreach_mul_1[48]\n    getitem_641 = _foreach_mul_1[49]\n    getitem_642 = _foreach_mul_1[50]\n    getitem_643 = _foreach_mul_1[51]\n    getitem_644 = _foreach_mul_1[52]\n    getitem_645 = _foreach_mul_1[53]\n    getitem_646 = _foreach_mul_1[54]\n    getitem_647 = _foreach_mul_1[55]\n    getitem_648 = _foreach_mul_1[56]\n    getitem_649 = _foreach_mul_1[57]\n    getitem_650 = _foreach_mul_1[58]\n    getitem_651 = _foreach_mul_1[59]\n    getitem_652 = _foreach_mul_1[60]\n    getitem_653 = _foreach_mul_1[61]\n    getitem_654 = _foreach_mul_1[62]\n    getitem_655 = _foreach_mul_1[63]\n    getitem_656 = _foreach_mul_1[64]\n    getitem_657 = _foreach_mul_1[65]\n    getitem_658 = _foreach_mul_1[66]\n    getitem_659 = _foreach_mul_1[67]\n    getitem_660 = _foreach_mul_1[68]\n    getitem_661 = _foreach_mul_1[69]\n    getitem_662 = _foreach_mul_1[70]\n    getitem_663 = _foreach_mul_1[71]\n    getitem_664 = _foreach_mul_1[72]\n    getitem_665 = _foreach_mul_1[73]\n    getitem_666 = _foreach_mul_1[74]\n    getitem_667 = _foreach_mul_1[75]\n    getitem_668 = _foreach_mul_1[76]\n    getitem_669 = _foreach_mul_1[77]\n    getitem_670 = _foreach_mul_1[78]\n    getitem_671 = _foreach_mul_1[79]\n    getitem_672 = _foreach_mul_1[80]\n    getitem_673 = _foreach_mul_1[81]\n    getitem_674 = _foreach_mul_1[82]\n    getitem_675 = _foreach_mul_1[83]\n    getitem_676 = _foreach_mul_1[84]\n    getitem_677 = _foreach_mul_1[85]\n    getitem_678 = _foreach_mul_1[86]\n    getitem_679 = _foreach_mul_1[87]\n    getitem_680 = _foreach_mul_1[88]\n    getitem_681 = _foreach_mul_1[89]\n    getitem_682 = _foreach_mul_1[90]\n    getitem_683 = _foreach_mul_1[91]\n    getitem_684 = _foreach_mul_1[92]\n    getitem_685 = _foreach_mul_1[93]\n    getitem_686 = _foreach_mul_1[94]\n    getitem_687 = _foreach_mul_1[95]\n    getitem_688 = _foreach_mul_1[96]\n    getitem_689 = _foreach_mul_1[97]\n    getitem_690 = _foreach_mul_1[98]\n    getitem_691 = _foreach_mul_1[99]\n    getitem_692 = _foreach_mul_1[100]\n    getitem_693 = _foreach_mul_1[101]\n    getitem_694 = _foreach_mul_1[102]\n    getitem_695 = _foreach_mul_1[103]\n    getitem_696 = _foreach_mul_1[104]\n    getitem_697 = _foreach_mul_1[105]\n    getitem_698 = _foreach_mul_1[106]\n    getitem_699 = _foreach_mul_1[107]\n    getitem_700 = _foreach_mul_1[108]\n    getitem_701 = _foreach_mul_1[109]\n    getitem_702 = _foreach_mul_1[110]\n    getitem_703 = _foreach_mul_1[111]\n    getitem_704 = _foreach_mul_1[112]\n    getitem_705 = _foreach_mul_1[113]\n    getitem_706 = _foreach_mul_1[114]\n    getitem_707 = _foreach_mul_1[115]\n    getitem_708 = _foreach_mul_1[116]\n    getitem_709 = _foreach_mul_1[117]\n    getitem_710 = _foreach_mul_1[118]\n    getitem_711 = _foreach_mul_1[119]\n    getitem_712 = _foreach_mul_1[120]\n    getitem_713 = _foreach_mul_1[121]\n    getitem_714 = _foreach_mul_1[122]\n    getitem_715 = _foreach_mul_1[123]\n    getitem_716 = _foreach_mul_1[124]\n    getitem_717 = _foreach_mul_1[125]\n    getitem_718 = _foreach_mul_1[126]\n    getitem_719 = _foreach_mul_1[127]\n    getitem_720 = _foreach_mul_1[128]\n    getitem_721 = _foreach_mul_1[129]\n    getitem_722 = _foreach_mul_1[130]\n    getitem_723 = _foreach_mul_1[131]\n    getitem_724 = _foreach_mul_1[132]\n    getitem_725 = _foreach_mul_1[133]\n    getitem_726 = _foreach_mul_1[134]\n    getitem_727 = _foreach_mul_1[135]\n    getitem_728 = _foreach_mul_1[136]\n    getitem_729 = _foreach_mul_1[137]\n    getitem_730 = _foreach_mul_1[138]\n    getitem_731 = _foreach_mul_1[139]\n    getitem_732 = _foreach_mul_1[140]\n    getitem_733 = _foreach_mul_1[141]\n    getitem_734 = _foreach_mul_1[142]\n    getitem_735 = _foreach_mul_1[143]\n    getitem_736 = _foreach_mul_1[144]\n    getitem_737 = _foreach_mul_1[145]\n    getitem_738 = _foreach_mul_1[146]\n    getitem_739 = _foreach_mul_1[147];  _foreach_mul_1 = None\n    _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]);  arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None\n    getitem_740 = _foreach_mul_2[0]\n    getitem_741 = _foreach_mul_2[1]\n    getitem_742 = _foreach_mul_2[2]\n    getitem_743 = _foreach_mul_2[3]\n    getitem_744 = _foreach_mul_2[4]\n    getitem_745 = _foreach_mul_2[5]\n    getitem_746 = _foreach_mul_2[6]\n    getitem_747 = _foreach_mul_2[7]\n    getitem_748 = _foreach_mul_2[8]\n    getitem_749 = _foreach_mul_2[9]\n    getitem_750 = _foreach_mul_2[10]\n    getitem_751 = _foreach_mul_2[11]\n    getitem_752 = _foreach_mul_2[12]\n    getitem_753 = _foreach_mul_2[13]\n    getitem_754 = _foreach_mul_2[14]\n    getitem_755 = _foreach_mul_2[15]\n    getitem_756 = _foreach_mul_2[16]\n    getitem_757 = _foreach_mul_2[17]\n    getitem_758 = _foreach_mul_2[18]\n    getitem_759 = _foreach_mul_2[19]\n    getitem_760 = _foreach_mul_2[20]\n    getitem_761 = _foreach_mul_2[21]\n    getitem_762 = _foreach_mul_2[22]\n    getitem_763 = _foreach_mul_2[23]\n    getitem_764 = _foreach_mul_2[24]\n    getitem_765 = _foreach_mul_2[25]\n    getitem_766 = _foreach_mul_2[26]\n    getitem_767 = _foreach_mul_2[27]\n    getitem_768 = _foreach_mul_2[28]\n    getitem_769 = _foreach_mul_2[29]\n    getitem_770 = _foreach_mul_2[30]\n    getitem_771 = _foreach_mul_2[31]\n    getitem_772 = _foreach_mul_2[32]\n    getitem_773 = _foreach_mul_2[33]\n    getitem_774 = _foreach_mul_2[34]\n    getitem_775 = _foreach_mul_2[35]\n    getitem_776 = _foreach_mul_2[36]\n    getitem_777 = _foreach_mul_2[37]\n    getitem_778 = _foreach_mul_2[38]\n    getitem_779 = _foreach_mul_2[39]\n    getitem_780 = _foreach_mul_2[40]\n    getitem_781 = _foreach_mul_2[41]\n    getitem_782 = _foreach_mul_2[42]\n    getitem_783 = _foreach_mul_2[43]\n    getitem_784 = _foreach_mul_2[44]\n    getitem_785 = _foreach_mul_2[45]\n    getitem_786 = _foreach_mul_2[46]\n    getitem_787 = _foreach_mul_2[47]\n    getitem_788 = _foreach_mul_2[48]\n    getitem_789 = _foreach_mul_2[49]\n    getitem_790 = _foreach_mul_2[50]\n    getitem_791 = _foreach_mul_2[51]\n    getitem_792 = _foreach_mul_2[52]\n    getitem_793 = _foreach_mul_2[53]\n    getitem_794 = _foreach_mul_2[54]\n    getitem_795 = _foreach_mul_2[55]\n    getitem_796 = _foreach_mul_2[56]\n    getitem_797 = _foreach_mul_2[57]\n    getitem_798 = _foreach_mul_2[58]\n    getitem_799 = _foreach_mul_2[59]\n    getitem_800 = _foreach_mul_2[60]\n    getitem_801 = _foreach_mul_2[61]\n    getitem_802 = _foreach_mul_2[62]\n    getitem_803 = _foreach_mul_2[63]\n    getitem_804 = _foreach_mul_2[64]\n    getitem_805 = _foreach_mul_2[65]\n    getitem_806 = _foreach_mul_2[66]\n    getitem_807 = _foreach_mul_2[67]\n    getitem_808 = _foreach_mul_2[68]\n    getitem_809 = _foreach_mul_2[69]\n    getitem_810 = _foreach_mul_2[70]\n    getitem_811 = _foreach_mul_2[71]\n    getitem_812 = _foreach_mul_2[72]\n    getitem_813 = _foreach_mul_2[73]\n    getitem_814 = _foreach_mul_2[74]\n    getitem_815 = _foreach_mul_2[75]\n    getitem_816 = _foreach_mul_2[76]\n    getitem_817 = _foreach_mul_2[77]\n    getitem_818 = _foreach_mul_2[78]\n    getitem_819 = _foreach_mul_2[79]\n    getitem_820 = _foreach_mul_2[80]\n    getitem_821 = _foreach_mul_2[81]\n    getitem_822 = _foreach_mul_2[82]\n    getitem_823 = _foreach_mul_2[83]\n    getitem_824 = _foreach_mul_2[84]\n    getitem_825 = _foreach_mul_2[85]\n    getitem_826 = _foreach_mul_2[86]\n    getitem_827 = _foreach_mul_2[87]\n    getitem_828 = _foreach_mul_2[88]\n    getitem_829 = _foreach_mul_2[89]\n    getitem_830 = _foreach_mul_2[90]\n    getitem_831 = _foreach_mul_2[91]\n    getitem_832 = _foreach_mul_2[92]\n    getitem_833 = _foreach_mul_2[93]\n    getitem_834 = _foreach_mul_2[94]\n    getitem_835 = _foreach_mul_2[95]\n    getitem_836 = _foreach_mul_2[96]\n    getitem_837 = _foreach_mul_2[97]\n    getitem_838 = _foreach_mul_2[98]\n    getitem_839 = _foreach_mul_2[99]\n    getitem_840 = _foreach_mul_2[100]\n    getitem_841 = _foreach_mul_2[101]\n    getitem_842 = _foreach_mul_2[102]\n    getitem_843 = _foreach_mul_2[103]\n    getitem_844 = _foreach_mul_2[104]\n    getitem_845 = _foreach_mul_2[105]\n    getitem_846 = _foreach_mul_2[106]\n    getitem_847 = _foreach_mul_2[107]\n    getitem_848 = _foreach_mul_2[108]\n    getitem_849 = _foreach_mul_2[109]\n    getitem_850 = _foreach_mul_2[110]\n    getitem_851 = _foreach_mul_2[111]\n    getitem_852 = _foreach_mul_2[112]\n    getitem_853 = _foreach_mul_2[113]\n    getitem_854 = _foreach_mul_2[114]\n    getitem_855 = _foreach_mul_2[115]\n    getitem_856 = _foreach_mul_2[116]\n    getitem_857 = _foreach_mul_2[117]\n    getitem_858 = _foreach_mul_2[118]\n    getitem_859 = _foreach_mul_2[119]\n    getitem_860 = _foreach_mul_2[120]\n    getitem_861 = _foreach_mul_2[121]\n    getitem_862 = _foreach_mul_2[122]\n    getitem_863 = _foreach_mul_2[123]\n    getitem_864 = _foreach_mul_2[124]\n    getitem_865 = _foreach_mul_2[125]\n    getitem_866 = _foreach_mul_2[126]\n    getitem_867 = _foreach_mul_2[127]\n    getitem_868 = _foreach_mul_2[128]\n    getitem_869 = _foreach_mul_2[129]\n    getitem_870 = _foreach_mul_2[130]\n    getitem_871 = _foreach_mul_2[131]\n    getitem_872 = _foreach_mul_2[132]\n    getitem_873 = _foreach_mul_2[133]\n    getitem_874 = _foreach_mul_2[134]\n    getitem_875 = _foreach_mul_2[135]\n    getitem_876 = _foreach_mul_2[136]\n    getitem_877 = _foreach_mul_2[137]\n    getitem_878 = _foreach_mul_2[138]\n    getitem_879 = _foreach_mul_2[139]\n    getitem_880 = _foreach_mul_2[140]\n    getitem_881 = _foreach_mul_2[141]\n    getitem_882 = _foreach_mul_2[142]\n    getitem_883 = _foreach_mul_2[143]\n    getitem_884 = _foreach_mul_2[144]\n    getitem_885 = _foreach_mul_2[145]\n    getitem_886 = _foreach_mul_2[146]\n    getitem_887 = _foreach_mul_2[147];  _foreach_mul_2 = None\n    _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009);  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None\n    getitem_888 = _foreach_add_2[0]\n    getitem_889 = _foreach_add_2[1]\n    getitem_890 = _foreach_add_2[2]\n    getitem_891 = _foreach_add_2[3]\n    getitem_892 = _foreach_add_2[4]\n    getitem_893 = _foreach_add_2[5]\n    getitem_894 = _foreach_add_2[6]\n    getitem_895 = _foreach_add_2[7]\n    getitem_896 = _foreach_add_2[8]\n    getitem_897 = _foreach_add_2[9]\n    getitem_898 = _foreach_add_2[10]\n    getitem_899 = _foreach_add_2[11]\n    getitem_900 = _foreach_add_2[12]\n    getitem_901 = _foreach_add_2[13]\n    getitem_902 = _foreach_add_2[14]\n    getitem_903 = _foreach_add_2[15]\n    getitem_904 = _foreach_add_2[16]\n    getitem_905 = _foreach_add_2[17]\n    getitem_906 = _foreach_add_2[18]\n    getitem_907 = _foreach_add_2[19]\n    getitem_908 = _foreach_add_2[20]\n    getitem_909 = _foreach_add_2[21]\n    getitem_910 = _foreach_add_2[22]\n    getitem_911 = _foreach_add_2[23]\n    getitem_912 = _foreach_add_2[24]\n    getitem_913 = _foreach_add_2[25]\n    getitem_914 = _foreach_add_2[26]\n    getitem_915 = _foreach_add_2[27]\n    getitem_916 = _foreach_add_2[28]\n    getitem_917 = _foreach_add_2[29]\n    getitem_918 = _foreach_add_2[30]\n    getitem_919 = _foreach_add_2[31]\n    getitem_920 = _foreach_add_2[32]\n    getitem_921 = _foreach_add_2[33]\n    getitem_922 = _foreach_add_2[34]\n    getitem_923 = _foreach_add_2[35]\n    getitem_924 = _foreach_add_2[36]\n    getitem_925 = _foreach_add_2[37]\n    getitem_926 = _foreach_add_2[38]\n    getitem_927 = _foreach_add_2[39]\n    getitem_928 = _foreach_add_2[40]\n    getitem_929 = _foreach_add_2[41]\n    getitem_930 = _foreach_add_2[42]\n    getitem_931 = _foreach_add_2[43]\n    getitem_932 = _foreach_add_2[44]\n    getitem_933 = _foreach_add_2[45]\n    getitem_934 = _foreach_add_2[46]\n    getitem_935 = _foreach_add_2[47]\n    getitem_936 = _foreach_add_2[48]\n    getitem_937 = _foreach_add_2[49]\n    getitem_938 = _foreach_add_2[50]\n    getitem_939 = _foreach_add_2[51]\n    getitem_940 = _foreach_add_2[52]\n    getitem_941 = _foreach_add_2[53]\n    getitem_942 = _foreach_add_2[54]\n    getitem_943 = _foreach_add_2[55]\n    getitem_944 = _foreach_add_2[56]\n    getitem_945 = _foreach_add_2[57]\n    getitem_946 = _foreach_add_2[58]\n    getitem_947 = _foreach_add_2[59]\n    getitem_948 = _foreach_add_2[60]\n    getitem_949 = _foreach_add_2[61]\n    getitem_950 = _foreach_add_2[62]\n    getitem_951 = _foreach_add_2[63]\n    getitem_952 = _foreach_add_2[64]\n    getitem_953 = _foreach_add_2[65]\n    getitem_954 = _foreach_add_2[66]\n    getitem_955 = _foreach_add_2[67]\n    getitem_956 = _foreach_add_2[68]\n    getitem_957 = _foreach_add_2[69]\n    getitem_958 = _foreach_add_2[70]\n    getitem_959 = _foreach_add_2[71]\n    getitem_960 = _foreach_add_2[72]\n    getitem_961 = _foreach_add_2[73]\n    getitem_962 = _foreach_add_2[74]\n    getitem_963 = _foreach_add_2[75]\n    getitem_964 = _foreach_add_2[76]\n    getitem_965 = _foreach_add_2[77]\n    getitem_966 = _foreach_add_2[78]\n    getitem_967 = _foreach_add_2[79]\n    getitem_968 = _foreach_add_2[80]\n    getitem_969 = _foreach_add_2[81]\n    getitem_970 = _foreach_add_2[82]\n    getitem_971 = _foreach_add_2[83]\n    getitem_972 = _foreach_add_2[84]\n    getitem_973 = _foreach_add_2[85]\n    getitem_974 = _foreach_add_2[86]\n    getitem_975 = _foreach_add_2[87]\n    getitem_976 = _foreach_add_2[88]\n    getitem_977 = _foreach_add_2[89]\n    getitem_978 = _foreach_add_2[90]\n    getitem_979 = _foreach_add_2[91]\n    getitem_980 = _foreach_add_2[92]\n    getitem_981 = _foreach_add_2[93]\n    getitem_982 = _foreach_add_2[94]\n    getitem_983 = _foreach_add_2[95]\n    getitem_984 = _foreach_add_2[96]\n    getitem_985 = _foreach_add_2[97]\n    getitem_986 = _foreach_add_2[98]\n    getitem_987 = _foreach_add_2[99]\n    getitem_988 = _foreach_add_2[100]\n    getitem_989 = _foreach_add_2[101]\n    getitem_990 = _foreach_add_2[102]\n    getitem_991 = _foreach_add_2[103]\n    getitem_992 = _foreach_add_2[104]\n    getitem_993 = _foreach_add_2[105]\n    getitem_994 = _foreach_add_2[106]\n    getitem_995 = _foreach_add_2[107]\n    getitem_996 = _foreach_add_2[108]\n    getitem_997 = _foreach_add_2[109]\n    getitem_998 = _foreach_add_2[110]\n    getitem_999 = _foreach_add_2[111]\n    getitem_1000 = _foreach_add_2[112]\n    getitem_1001 = _foreach_add_2[113]\n    getitem_1002 = _foreach_add_2[114]\n    getitem_1003 = _foreach_add_2[115]\n    getitem_1004 = _foreach_add_2[116]\n    getitem_1005 = _foreach_add_2[117]\n    getitem_1006 = _foreach_add_2[118]\n    getitem_1007 = _foreach_add_2[119]\n    getitem_1008 = _foreach_add_2[120]\n    getitem_1009 = _foreach_add_2[121]\n    getitem_1010 = _foreach_add_2[122]\n    getitem_1011 = _foreach_add_2[123]\n    getitem_1012 = _foreach_add_2[124]\n    getitem_1013 = _foreach_add_2[125]\n    getitem_1014 = _foreach_add_2[126]\n    getitem_1015 = _foreach_add_2[127]\n    getitem_1016 = _foreach_add_2[128]\n    getitem_1017 = _foreach_add_2[129]\n    getitem_1018 = _foreach_add_2[130]\n    getitem_1019 = _foreach_add_2[131]\n    getitem_1020 = _foreach_add_2[132]\n    getitem_1021 = _foreach_add_2[133]\n    getitem_1022 = _foreach_add_2[134]\n    getitem_1023 = _foreach_add_2[135]\n    getitem_1024 = _foreach_add_2[136]\n    getitem_1025 = _foreach_add_2[137]\n    getitem_1026 = _foreach_add_2[138]\n    getitem_1027 = _foreach_add_2[139]\n    getitem_1028 = _foreach_add_2[140]\n    getitem_1029 = _foreach_add_2[141]\n    getitem_1030 = _foreach_add_2[142]\n    getitem_1031 = _foreach_add_2[143]\n    getitem_1032 = _foreach_add_2[144]\n    getitem_1033 = _foreach_add_2[145]\n    getitem_1034 = _foreach_add_2[146]\n    getitem_1035 = _foreach_add_2[147];  _foreach_add_2 = None\n    _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n    getitem_1036 = _foreach_pow[0]\n    getitem_1037 = _foreach_pow[1]\n    getitem_1038 = _foreach_pow[2]\n    getitem_1039 = _foreach_pow[3]\n    getitem_1040 = _foreach_pow[4]\n    getitem_1041 = _foreach_pow[5]\n    getitem_1042 = _foreach_pow[6]\n    getitem_1043 = _foreach_pow[7]\n    getitem_1044 = _foreach_pow[8]\n    getitem_1045 = _foreach_pow[9]\n    getitem_1046 = _foreach_pow[10]\n    getitem_1047 = _foreach_pow[11]\n    getitem_1048 = _foreach_pow[12]\n    getitem_1049 = _foreach_pow[13]\n    getitem_1050 = _foreach_pow[14]\n    getitem_1051 = _foreach_pow[15]\n    getitem_1052 = _foreach_pow[16]\n    getitem_1053 = _foreach_pow[17]\n    getitem_1054 = _foreach_pow[18]\n    getitem_1055 = _foreach_pow[19]\n    getitem_1056 = _foreach_pow[20]\n    getitem_1057 = _foreach_pow[21]\n    getitem_1058 = _foreach_pow[22]\n    getitem_1059 = _foreach_pow[23]\n    getitem_1060 = _foreach_pow[24]\n    getitem_1061 = _foreach_pow[25]\n    getitem_1062 = _foreach_pow[26]\n    getitem_1063 = _foreach_pow[27]\n    getitem_1064 = _foreach_pow[28]\n    getitem_1065 = _foreach_pow[29]\n    getitem_1066 = _foreach_pow[30]\n    getitem_1067 = _foreach_pow[31]\n    getitem_1068 = _foreach_pow[32]\n    getitem_1069 = _foreach_pow[33]\n    getitem_1070 = _foreach_pow[34]\n    getitem_1071 = _foreach_pow[35]\n    getitem_1072 = _foreach_pow[36]\n    getitem_1073 = _foreach_pow[37]\n    getitem_1074 = _foreach_pow[38]\n    getitem_1075 = _foreach_pow[39]\n    getitem_1076 = _foreach_pow[40]\n    getitem_1077 = _foreach_pow[41]\n    getitem_1078 = _foreach_pow[42]\n    getitem_1079 = _foreach_pow[43]\n    getitem_1080 = _foreach_pow[44]\n    getitem_1081 = _foreach_pow[45]\n    getitem_1082 = _foreach_pow[46]\n    getitem_1083 = _foreach_pow[47]\n    getitem_1084 = _foreach_pow[48]\n    getitem_1085 = _foreach_pow[49]\n    getitem_1086 = _foreach_pow[50]\n    getitem_1087 = _foreach_pow[51]\n    getitem_1088 = _foreach_pow[52]\n    getitem_1089 = _foreach_pow[53]\n    getitem_1090 = _foreach_pow[54]\n    getitem_1091 = _foreach_pow[55]\n    getitem_1092 = _foreach_pow[56]\n    getitem_1093 = _foreach_pow[57]\n    getitem_1094 = _foreach_pow[58]\n    getitem_1095 = _foreach_pow[59]\n    getitem_1096 = _foreach_pow[60]\n    getitem_1097 = _foreach_pow[61]\n    getitem_1098 = _foreach_pow[62]\n    getitem_1099 = _foreach_pow[63]\n    getitem_1100 = _foreach_pow[64]\n    getitem_1101 = _foreach_pow[65]\n    getitem_1102 = _foreach_pow[66]\n    getitem_1103 = _foreach_pow[67]\n    getitem_1104 = _foreach_pow[68]\n    getitem_1105 = _foreach_pow[69]\n    getitem_1106 = _foreach_pow[70]\n    getitem_1107 = _foreach_pow[71]\n    getitem_1108 = _foreach_pow[72]\n    getitem_1109 = _foreach_pow[73]\n    getitem_1110 = _foreach_pow[74]\n    getitem_1111 = _foreach_pow[75]\n    getitem_1112 = _foreach_pow[76]\n    getitem_1113 = _foreach_pow[77]\n    getitem_1114 = _foreach_pow[78]\n    getitem_1115 = _foreach_pow[79]\n    getitem_1116 = _foreach_pow[80]\n    getitem_1117 = _foreach_pow[81]\n    getitem_1118 = _foreach_pow[82]\n    getitem_1119 = _foreach_pow[83]\n    getitem_1120 = _foreach_pow[84]\n    getitem_1121 = _foreach_pow[85]\n    getitem_1122 = _foreach_pow[86]\n    getitem_1123 = _foreach_pow[87]\n    getitem_1124 = _foreach_pow[88]\n    getitem_1125 = _foreach_pow[89]\n    getitem_1126 = _foreach_pow[90]\n    getitem_1127 = _foreach_pow[91]\n    getitem_1128 = _foreach_pow[92]\n    getitem_1129 = _foreach_pow[93]\n    getitem_1130 = _foreach_pow[94]\n    getitem_1131 = _foreach_pow[95]\n    getitem_1132 = _foreach_pow[96]\n    getitem_1133 = _foreach_pow[97]\n    getitem_1134 = _foreach_pow[98]\n    getitem_1135 = _foreach_pow[99]\n    getitem_1136 = _foreach_pow[100]\n    getitem_1137 = _foreach_pow[101]\n    getitem_1138 = _foreach_pow[102]\n    getitem_1139 = _foreach_pow[103]\n    getitem_1140 = _foreach_pow[104]\n    getitem_1141 = _foreach_pow[105]\n    getitem_1142 = _foreach_pow[106]\n    getitem_1143 = _foreach_pow[107]\n    getitem_1144 = _foreach_pow[108]\n    getitem_1145 = _foreach_pow[109]\n    getitem_1146 = _foreach_pow[110]\n    getitem_1147 = _foreach_pow[111]\n    getitem_1148 = _foreach_pow[112]\n    getitem_1149 = _foreach_pow[113]\n    getitem_1150 = _foreach_pow[114]\n    getitem_1151 = _foreach_pow[115]\n    getitem_1152 = _foreach_pow[116]\n    getitem_1153 = _foreach_pow[117]\n    getitem_1154 = _foreach_pow[118]\n    getitem_1155 = _foreach_pow[119]\n    getitem_1156 = _foreach_pow[120]\n    getitem_1157 = _foreach_pow[121]\n    getitem_1158 = _foreach_pow[122]\n    getitem_1159 = _foreach_pow[123]\n    getitem_1160 = _foreach_pow[124]\n    getitem_1161 = _foreach_pow[125]\n    getitem_1162 = _foreach_pow[126]\n    getitem_1163 = _foreach_pow[127]\n    getitem_1164 = _foreach_pow[128]\n    getitem_1165 = _foreach_pow[129]\n    getitem_1166 = _foreach_pow[130]\n    getitem_1167 = _foreach_pow[131]\n    getitem_1168 = _foreach_pow[132]\n    getitem_1169 = _foreach_pow[133]\n    getitem_1170 = _foreach_pow[134]\n    getitem_1171 = _foreach_pow[135]\n    getitem_1172 = _foreach_pow[136]\n    getitem_1173 = _foreach_pow[137]\n    getitem_1174 = _foreach_pow[138]\n    getitem_1175 = _foreach_pow[139]\n    getitem_1176 = _foreach_pow[140]\n    getitem_1177 = _foreach_pow[141]\n    getitem_1178 = _foreach_pow[142]\n    getitem_1179 = _foreach_pow[143]\n    getitem_1180 = _foreach_pow[144]\n    getitem_1181 = _foreach_pow[145]\n    getitem_1182 = _foreach_pow[146]\n    getitem_1183 = _foreach_pow[147];  _foreach_pow = None\n    _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n    getitem_1184 = _foreach_pow_1[0]\n    getitem_1185 = _foreach_pow_1[1]\n    getitem_1186 = _foreach_pow_1[2]\n    getitem_1187 = _foreach_pow_1[3]\n    getitem_1188 = _foreach_pow_1[4]\n    getitem_1189 = _foreach_pow_1[5]\n    getitem_1190 = _foreach_pow_1[6]\n    getitem_1191 = _foreach_pow_1[7]\n    getitem_1192 = _foreach_pow_1[8]\n    getitem_1193 = _foreach_pow_1[9]\n    getitem_1194 = _foreach_pow_1[10]\n    getitem_1195 = _foreach_pow_1[11]\n    getitem_1196 = _foreach_pow_1[12]\n    getitem_1197 = _foreach_pow_1[13]\n    getitem_1198 = _foreach_pow_1[14]\n    getitem_1199 = _foreach_pow_1[15]\n    getitem_1200 = _foreach_pow_1[16]\n    getitem_1201 = _foreach_pow_1[17]\n    getitem_1202 = _foreach_pow_1[18]\n    getitem_1203 = _foreach_pow_1[19]\n    getitem_1204 = _foreach_pow_1[20]\n    getitem_1205 = _foreach_pow_1[21]\n    getitem_1206 = _foreach_pow_1[22]\n    getitem_1207 = _foreach_pow_1[23]\n    getitem_1208 = _foreach_pow_1[24]\n    getitem_1209 = _foreach_pow_1[25]\n    getitem_1210 = _foreach_pow_1[26]\n    getitem_1211 = _foreach_pow_1[27]\n    getitem_1212 = _foreach_pow_1[28]\n    getitem_1213 = _foreach_pow_1[29]\n    getitem_1214 = _foreach_pow_1[30]\n    getitem_1215 = _foreach_pow_1[31]\n    getitem_1216 = _foreach_pow_1[32]\n    getitem_1217 = _foreach_pow_1[33]\n    getitem_1218 = _foreach_pow_1[34]\n    getitem_1219 = _foreach_pow_1[35]\n    getitem_1220 = _foreach_pow_1[36]\n    getitem_1221 = _foreach_pow_1[37]\n    getitem_1222 = _foreach_pow_1[38]\n    getitem_1223 = _foreach_pow_1[39]\n    getitem_1224 = _foreach_pow_1[40]\n    getitem_1225 = _foreach_pow_1[41]\n    getitem_1226 = _foreach_pow_1[42]\n    getitem_1227 = _foreach_pow_1[43]\n    getitem_1228 = _foreach_pow_1[44]\n    getitem_1229 = _foreach_pow_1[45]\n    getitem_1230 = _foreach_pow_1[46]\n    getitem_1231 = _foreach_pow_1[47]\n    getitem_1232 = _foreach_pow_1[48]\n    getitem_1233 = _foreach_pow_1[49]\n    getitem_1234 = _foreach_pow_1[50]\n    getitem_1235 = _foreach_pow_1[51]\n    getitem_1236 = _foreach_pow_1[52]\n    getitem_1237 = _foreach_pow_1[53]\n    getitem_1238 = _foreach_pow_1[54]\n    getitem_1239 = _foreach_pow_1[55]\n    getitem_1240 = _foreach_pow_1[56]\n    getitem_1241 = _foreach_pow_1[57]\n    getitem_1242 = _foreach_pow_1[58]\n    getitem_1243 = _foreach_pow_1[59]\n    getitem_1244 = _foreach_pow_1[60]\n    getitem_1245 = _foreach_pow_1[61]\n    getitem_1246 = _foreach_pow_1[62]\n    getitem_1247 = _foreach_pow_1[63]\n    getitem_1248 = _foreach_pow_1[64]\n    getitem_1249 = _foreach_pow_1[65]\n    getitem_1250 = _foreach_pow_1[66]\n    getitem_1251 = _foreach_pow_1[67]\n    getitem_1252 = _foreach_pow_1[68]\n    getitem_1253 = _foreach_pow_1[69]\n    getitem_1254 = _foreach_pow_1[70]\n    getitem_1255 = _foreach_pow_1[71]\n    getitem_1256 = _foreach_pow_1[72]\n    getitem_1257 = _foreach_pow_1[73]\n    getitem_1258 = _foreach_pow_1[74]\n    getitem_1259 = _foreach_pow_1[75]\n    getitem_1260 = _foreach_pow_1[76]\n    getitem_1261 = _foreach_pow_1[77]\n    getitem_1262 = _foreach_pow_1[78]\n    getitem_1263 = _foreach_pow_1[79]\n    getitem_1264 = _foreach_pow_1[80]\n    getitem_1265 = _foreach_pow_1[81]\n    getitem_1266 = _foreach_pow_1[82]\n    getitem_1267 = _foreach_pow_1[83]\n    getitem_1268 = _foreach_pow_1[84]\n    getitem_1269 = _foreach_pow_1[85]\n    getitem_1270 = _foreach_pow_1[86]\n    getitem_1271 = _foreach_pow_1[87]\n    getitem_1272 = _foreach_pow_1[88]\n    getitem_1273 = _foreach_pow_1[89]\n    getitem_1274 = _foreach_pow_1[90]\n    getitem_1275 = _foreach_pow_1[91]\n    getitem_1276 = _foreach_pow_1[92]\n    getitem_1277 = _foreach_pow_1[93]\n    getitem_1278 = _foreach_pow_1[94]\n    getitem_1279 = _foreach_pow_1[95]\n    getitem_1280 = _foreach_pow_1[96]\n    getitem_1281 = _foreach_pow_1[97]\n    getitem_1282 = _foreach_pow_1[98]\n    getitem_1283 = _foreach_pow_1[99]\n    getitem_1284 = _foreach_pow_1[100]\n    getitem_1285 = _foreach_pow_1[101]\n    getitem_1286 = _foreach_pow_1[102]\n    getitem_1287 = _foreach_pow_1[103]\n    getitem_1288 = _foreach_pow_1[104]\n    getitem_1289 = _foreach_pow_1[105]\n    getitem_1290 = _foreach_pow_1[106]\n    getitem_1291 = _foreach_pow_1[107]\n    getitem_1292 = _foreach_pow_1[108]\n    getitem_1293 = _foreach_pow_1[109]\n    getitem_1294 = _foreach_pow_1[110]\n    getitem_1295 = _foreach_pow_1[111]\n    getitem_1296 = _foreach_pow_1[112]\n    getitem_1297 = _foreach_pow_1[113]\n    getitem_1298 = _foreach_pow_1[114]\n    getitem_1299 = _foreach_pow_1[115]\n    getitem_1300 = _foreach_pow_1[116]\n    getitem_1301 = _foreach_pow_1[117]\n    getitem_1302 = _foreach_pow_1[118]\n    getitem_1303 = _foreach_pow_1[119]\n    getitem_1304 = _foreach_pow_1[120]\n    getitem_1305 = _foreach_pow_1[121]\n    getitem_1306 = _foreach_pow_1[122]\n    getitem_1307 = _foreach_pow_1[123]\n    getitem_1308 = _foreach_pow_1[124]\n    getitem_1309 = _foreach_pow_1[125]\n    getitem_1310 = _foreach_pow_1[126]\n    getitem_1311 = _foreach_pow_1[127]\n    getitem_1312 = _foreach_pow_1[128]\n    getitem_1313 = _foreach_pow_1[129]\n    getitem_1314 = _foreach_pow_1[130]\n    getitem_1315 = _foreach_pow_1[131]\n    getitem_1316 = _foreach_pow_1[132]\n    getitem_1317 = _foreach_pow_1[133]\n    getitem_1318 = _foreach_pow_1[134]\n    getitem_1319 = _foreach_pow_1[135]\n    getitem_1320 = _foreach_pow_1[136]\n    getitem_1321 = _foreach_pow_1[137]\n    getitem_1322 = _foreach_pow_1[138]\n    getitem_1323 = _foreach_pow_1[139]\n    getitem_1324 = _foreach_pow_1[140]\n    getitem_1325 = _foreach_pow_1[141]\n    getitem_1326 = _foreach_pow_1[142]\n    getitem_1327 = _foreach_pow_1[143]\n    getitem_1328 = _foreach_pow_1[144]\n    getitem_1329 = _foreach_pow_1[145]\n    getitem_1330 = _foreach_pow_1[146]\n    getitem_1331 = _foreach_pow_1[147];  _foreach_pow_1 = None\n    _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1);  getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None\n    getitem_1332 = _foreach_sub_1[0]\n    getitem_1333 = _foreach_sub_1[1]\n    getitem_1334 = _foreach_sub_1[2]\n    getitem_1335 = _foreach_sub_1[3]\n    getitem_1336 = _foreach_sub_1[4]\n    getitem_1337 = _foreach_sub_1[5]\n    getitem_1338 = _foreach_sub_1[6]\n    getitem_1339 = _foreach_sub_1[7]\n    getitem_1340 = _foreach_sub_1[8]\n    getitem_1341 = _foreach_sub_1[9]\n    getitem_1342 = _foreach_sub_1[10]\n    getitem_1343 = _foreach_sub_1[11]\n    getitem_1344 = _foreach_sub_1[12]\n    getitem_1345 = _foreach_sub_1[13]\n    getitem_1346 = _foreach_sub_1[14]\n    getitem_1347 = _foreach_sub_1[15]\n    getitem_1348 = _foreach_sub_1[16]\n    getitem_1349 = _foreach_sub_1[17]\n    getitem_1350 = _foreach_sub_1[18]\n    getitem_1351 = _foreach_sub_1[19]\n    getitem_1352 = _foreach_sub_1[20]\n    getitem_1353 = _foreach_sub_1[21]\n    getitem_1354 = _foreach_sub_1[22]\n    getitem_1355 = _foreach_sub_1[23]\n    getitem_1356 = _foreach_sub_1[24]\n    getitem_1357 = _foreach_sub_1[25]\n    getitem_1358 = _foreach_sub_1[26]\n    getitem_1359 = _foreach_sub_1[27]\n    getitem_1360 = _foreach_sub_1[28]\n    getitem_1361 = _foreach_sub_1[29]\n    getitem_1362 = _foreach_sub_1[30]\n    getitem_1363 = _foreach_sub_1[31]\n    getitem_1364 = _foreach_sub_1[32]\n    getitem_1365 = _foreach_sub_1[33]\n    getitem_1366 = _foreach_sub_1[34]\n    getitem_1367 = _foreach_sub_1[35]\n    getitem_1368 = _foreach_sub_1[36]\n    getitem_1369 = _foreach_sub_1[37]\n    getitem_1370 = _foreach_sub_1[38]\n    getitem_1371 = _foreach_sub_1[39]\n    getitem_1372 = _foreach_sub_1[40]\n    getitem_1373 = _foreach_sub_1[41]\n    getitem_1374 = _foreach_sub_1[42]\n    getitem_1375 = _foreach_sub_1[43]\n    getitem_1376 = _foreach_sub_1[44]\n    getitem_1377 = _foreach_sub_1[45]\n    getitem_1378 = _foreach_sub_1[46]\n    getitem_1379 = _foreach_sub_1[47]\n    getitem_1380 = _foreach_sub_1[48]\n    getitem_1381 = _foreach_sub_1[49]\n    getitem_1382 = _foreach_sub_1[50]\n    getitem_1383 = _foreach_sub_1[51]\n    getitem_1384 = _foreach_sub_1[52]\n    getitem_1385 = _foreach_sub_1[53]\n    getitem_1386 = _foreach_sub_1[54]\n    getitem_1387 = _foreach_sub_1[55]\n    getitem_1388 = _foreach_sub_1[56]\n    getitem_1389 = _foreach_sub_1[57]\n    getitem_1390 = _foreach_sub_1[58]\n    getitem_1391 = _foreach_sub_1[59]\n    getitem_1392 = _foreach_sub_1[60]\n    getitem_1393 = _foreach_sub_1[61]\n    getitem_1394 = _foreach_sub_1[62]\n    getitem_1395 = _foreach_sub_1[63]\n    getitem_1396 = _foreach_sub_1[64]\n    getitem_1397 = _foreach_sub_1[65]\n    getitem_1398 = _foreach_sub_1[66]\n    getitem_1399 = _foreach_sub_1[67]\n    getitem_1400 = _foreach_sub_1[68]\n    getitem_1401 = _foreach_sub_1[69]\n    getitem_1402 = _foreach_sub_1[70]\n    getitem_1403 = _foreach_sub_1[71]\n    getitem_1404 = _foreach_sub_1[72]\n    getitem_1405 = _foreach_sub_1[73]\n    getitem_1406 = _foreach_sub_1[74]\n    getitem_1407 = _foreach_sub_1[75]\n    getitem_1408 = _foreach_sub_1[76]\n    getitem_1409 = _foreach_sub_1[77]\n    getitem_1410 = _foreach_sub_1[78]\n    getitem_1411 = _foreach_sub_1[79]\n    getitem_1412 = _foreach_sub_1[80]\n    getitem_1413 = _foreach_sub_1[81]\n    getitem_1414 = _foreach_sub_1[82]\n    getitem_1415 = _foreach_sub_1[83]\n    getitem_1416 = _foreach_sub_1[84]\n    getitem_1417 = _foreach_sub_1[85]\n    getitem_1418 = _foreach_sub_1[86]\n    getitem_1419 = _foreach_sub_1[87]\n    getitem_1420 = _foreach_sub_1[88]\n    getitem_1421 = _foreach_sub_1[89]\n    getitem_1422 = _foreach_sub_1[90]\n    getitem_1423 = _foreach_sub_1[91]\n    getitem_1424 = _foreach_sub_1[92]\n    getitem_1425 = _foreach_sub_1[93]\n    getitem_1426 = _foreach_sub_1[94]\n    getitem_1427 = _foreach_sub_1[95]\n    getitem_1428 = _foreach_sub_1[96]\n    getitem_1429 = _foreach_sub_1[97]\n    getitem_1430 = _foreach_sub_1[98]\n    getitem_1431 = _foreach_sub_1[99]\n    getitem_1432 = _foreach_sub_1[100]\n    getitem_1433 = _foreach_sub_1[101]\n    getitem_1434 = _foreach_sub_1[102]\n    getitem_1435 = _foreach_sub_1[103]\n    getitem_1436 = _foreach_sub_1[104]\n    getitem_1437 = _foreach_sub_1[105]\n    getitem_1438 = _foreach_sub_1[106]\n    getitem_1439 = _foreach_sub_1[107]\n    getitem_1440 = _foreach_sub_1[108]\n    getitem_1441 = _foreach_sub_1[109]\n    getitem_1442 = _foreach_sub_1[110]\n    getitem_1443 = _foreach_sub_1[111]\n    getitem_1444 = _foreach_sub_1[112]\n    getitem_1445 = _foreach_sub_1[113]\n    getitem_1446 = _foreach_sub_1[114]\n    getitem_1447 = _foreach_sub_1[115]\n    getitem_1448 = _foreach_sub_1[116]\n    getitem_1449 = _foreach_sub_1[117]\n    getitem_1450 = _foreach_sub_1[118]\n    getitem_1451 = _foreach_sub_1[119]\n    getitem_1452 = _foreach_sub_1[120]\n    getitem_1453 = _foreach_sub_1[121]\n    getitem_1454 = _foreach_sub_1[122]\n    getitem_1455 = _foreach_sub_1[123]\n    getitem_1456 = _foreach_sub_1[124]\n    getitem_1457 = _foreach_sub_1[125]\n    getitem_1458 = _foreach_sub_1[126]\n    getitem_1459 = _foreach_sub_1[127]\n    getitem_1460 = _foreach_sub_1[128]\n    getitem_1461 = _foreach_sub_1[129]\n    getitem_1462 = _foreach_sub_1[130]\n    getitem_1463 = _foreach_sub_1[131]\n    getitem_1464 = _foreach_sub_1[132]\n    getitem_1465 = _foreach_sub_1[133]\n    getitem_1466 = _foreach_sub_1[134]\n    getitem_1467 = _foreach_sub_1[135]\n    getitem_1468 = _foreach_sub_1[136]\n    getitem_1469 = _foreach_sub_1[137]\n    getitem_1470 = _foreach_sub_1[138]\n    getitem_1471 = _foreach_sub_1[139]\n    getitem_1472 = _foreach_sub_1[140]\n    getitem_1473 = _foreach_sub_1[141]\n    getitem_1474 = _foreach_sub_1[142]\n    getitem_1475 = _foreach_sub_1[143]\n    getitem_1476 = _foreach_sub_1[144]\n    getitem_1477 = _foreach_sub_1[145]\n    getitem_1478 = _foreach_sub_1[146]\n    getitem_1479 = _foreach_sub_1[147];  _foreach_sub_1 = None\n    _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1);  getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None\n    getitem_1480 = _foreach_sub_2[0]\n    getitem_1481 = _foreach_sub_2[1]\n    getitem_1482 = _foreach_sub_2[2]\n    getitem_1483 = _foreach_sub_2[3]\n    getitem_1484 = _foreach_sub_2[4]\n    getitem_1485 = _foreach_sub_2[5]\n    getitem_1486 = _foreach_sub_2[6]\n    getitem_1487 = _foreach_sub_2[7]\n    getitem_1488 = _foreach_sub_2[8]\n    getitem_1489 = _foreach_sub_2[9]\n    getitem_1490 = _foreach_sub_2[10]\n    getitem_1491 = _foreach_sub_2[11]\n    getitem_1492 = _foreach_sub_2[12]\n    getitem_1493 = _foreach_sub_2[13]\n    getitem_1494 = _foreach_sub_2[14]\n    getitem_1495 = _foreach_sub_2[15]\n    getitem_1496 = _foreach_sub_2[16]\n    getitem_1497 = _foreach_sub_2[17]\n    getitem_1498 = _foreach_sub_2[18]\n    getitem_1499 = _foreach_sub_2[19]\n    getitem_1500 = _foreach_sub_2[20]\n    getitem_1501 = _foreach_sub_2[21]\n    getitem_1502 = _foreach_sub_2[22]\n    getitem_1503 = _foreach_sub_2[23]\n    getitem_1504 = _foreach_sub_2[24]\n    getitem_1505 = _foreach_sub_2[25]\n    getitem_1506 = _foreach_sub_2[26]\n    getitem_1507 = _foreach_sub_2[27]\n    getitem_1508 = _foreach_sub_2[28]\n    getitem_1509 = _foreach_sub_2[29]\n    getitem_1510 = _foreach_sub_2[30]\n    getitem_1511 = _foreach_sub_2[31]\n    getitem_1512 = _foreach_sub_2[32]\n    getitem_1513 = _foreach_sub_2[33]\n    getitem_1514 = _foreach_sub_2[34]\n    getitem_1515 = _foreach_sub_2[35]\n    getitem_1516 = _foreach_sub_2[36]\n    getitem_1517 = _foreach_sub_2[37]\n    getitem_1518 = _foreach_sub_2[38]\n    getitem_1519 = _foreach_sub_2[39]\n    getitem_1520 = _foreach_sub_2[40]\n    getitem_1521 = _foreach_sub_2[41]\n    getitem_1522 = _foreach_sub_2[42]\n    getitem_1523 = _foreach_sub_2[43]\n    getitem_1524 = _foreach_sub_2[44]\n    getitem_1525 = _foreach_sub_2[45]\n    getitem_1526 = _foreach_sub_2[46]\n    getitem_1527 = _foreach_sub_2[47]\n    getitem_1528 = _foreach_sub_2[48]\n    getitem_1529 = _foreach_sub_2[49]\n    getitem_1530 = _foreach_sub_2[50]\n    getitem_1531 = _foreach_sub_2[51]\n    getitem_1532 = _foreach_sub_2[52]\n    getitem_1533 = _foreach_sub_2[53]\n    getitem_1534 = _foreach_sub_2[54]\n    getitem_1535 = _foreach_sub_2[55]\n    getitem_1536 = _foreach_sub_2[56]\n    getitem_1537 = _foreach_sub_2[57]\n    getitem_1538 = _foreach_sub_2[58]\n    getitem_1539 = _foreach_sub_2[59]\n    getitem_1540 = _foreach_sub_2[60]\n    getitem_1541 = _foreach_sub_2[61]\n    getitem_1542 = _foreach_sub_2[62]\n    getitem_1543 = _foreach_sub_2[63]\n    getitem_1544 = _foreach_sub_2[64]\n    getitem_1545 = _foreach_sub_2[65]\n    getitem_1546 = _foreach_sub_2[66]\n    getitem_1547 = _foreach_sub_2[67]\n    getitem_1548 = _foreach_sub_2[68]\n    getitem_1549 = _foreach_sub_2[69]\n    getitem_1550 = _foreach_sub_2[70]\n    getitem_1551 = _foreach_sub_2[71]\n    getitem_1552 = _foreach_sub_2[72]\n    getitem_1553 = _foreach_sub_2[73]\n    getitem_1554 = _foreach_sub_2[74]\n    getitem_1555 = _foreach_sub_2[75]\n    getitem_1556 = _foreach_sub_2[76]\n    getitem_1557 = _foreach_sub_2[77]\n    getitem_1558 = _foreach_sub_2[78]\n    getitem_1559 = _foreach_sub_2[79]\n    getitem_1560 = _foreach_sub_2[80]\n    getitem_1561 = _foreach_sub_2[81]\n    getitem_1562 = _foreach_sub_2[82]\n    getitem_1563 = _foreach_sub_2[83]\n    getitem_1564 = _foreach_sub_2[84]\n    getitem_1565 = _foreach_sub_2[85]\n    getitem_1566 = _foreach_sub_2[86]\n    getitem_1567 = _foreach_sub_2[87]\n    getitem_1568 = _foreach_sub_2[88]\n    getitem_1569 = _foreach_sub_2[89]\n    getitem_1570 = _foreach_sub_2[90]\n    getitem_1571 = _foreach_sub_2[91]\n    getitem_1572 = _foreach_sub_2[92]\n    getitem_1573 = _foreach_sub_2[93]\n    getitem_1574 = _foreach_sub_2[94]\n    getitem_1575 = _foreach_sub_2[95]\n    getitem_1576 = _foreach_sub_2[96]\n    getitem_1577 = _foreach_sub_2[97]\n    getitem_1578 = _foreach_sub_2[98]\n    getitem_1579 = _foreach_sub_2[99]\n    getitem_1580 = _foreach_sub_2[100]\n    getitem_1581 = _foreach_sub_2[101]\n    getitem_1582 = _foreach_sub_2[102]\n    getitem_1583 = _foreach_sub_2[103]\n    getitem_1584 = _foreach_sub_2[104]\n    getitem_1585 = _foreach_sub_2[105]\n    getitem_1586 = _foreach_sub_2[106]\n    getitem_1587 = _foreach_sub_2[107]\n    getitem_1588 = _foreach_sub_2[108]\n    getitem_1589 = _foreach_sub_2[109]\n    getitem_1590 = _foreach_sub_2[110]\n    getitem_1591 = _foreach_sub_2[111]\n    getitem_1592 = _foreach_sub_2[112]\n    getitem_1593 = _foreach_sub_2[113]\n    getitem_1594 = _foreach_sub_2[114]\n    getitem_1595 = _foreach_sub_2[115]\n    getitem_1596 = _foreach_sub_2[116]\n    getitem_1597 = _foreach_sub_2[117]\n    getitem_1598 = _foreach_sub_2[118]\n    getitem_1599 = _foreach_sub_2[119]\n    getitem_1600 = _foreach_sub_2[120]\n    getitem_1601 = _foreach_sub_2[121]\n    getitem_1602 = _foreach_sub_2[122]\n    getitem_1603 = _foreach_sub_2[123]\n    getitem_1604 = _foreach_sub_2[124]\n    getitem_1605 = _foreach_sub_2[125]\n    getitem_1606 = _foreach_sub_2[126]\n    getitem_1607 = _foreach_sub_2[127]\n    getitem_1608 = _foreach_sub_2[128]\n    getitem_1609 = _foreach_sub_2[129]\n    getitem_1610 = _foreach_sub_2[130]\n    getitem_1611 = _foreach_sub_2[131]\n    getitem_1612 = _foreach_sub_2[132]\n    getitem_1613 = _foreach_sub_2[133]\n    getitem_1614 = _foreach_sub_2[134]\n    getitem_1615 = _foreach_sub_2[135]\n    getitem_1616 = _foreach_sub_2[136]\n    getitem_1617 = _foreach_sub_2[137]\n    getitem_1618 = _foreach_sub_2[138]\n    getitem_1619 = _foreach_sub_2[139]\n    getitem_1620 = _foreach_sub_2[140]\n    getitem_1621 = _foreach_sub_2[141]\n    getitem_1622 = _foreach_sub_2[142]\n    getitem_1623 = _foreach_sub_2[143]\n    getitem_1624 = _foreach_sub_2[144]\n    getitem_1625 = _foreach_sub_2[145]\n    getitem_1626 = _foreach_sub_2[146]\n    getitem_1627 = _foreach_sub_2[147];  _foreach_sub_2 = None\n    _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]);  getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None\n    getitem_1628 = _foreach_neg[0]\n    getitem_1629 = _foreach_neg[1]\n    getitem_1630 = _foreach_neg[2]\n    getitem_1631 = _foreach_neg[3]\n    getitem_1632 = _foreach_neg[4]\n    getitem_1633 = _foreach_neg[5]\n    getitem_1634 = _foreach_neg[6]\n    getitem_1635 = _foreach_neg[7]\n    getitem_1636 = _foreach_neg[8]\n    getitem_1637 = _foreach_neg[9]\n    getitem_1638 = _foreach_neg[10]\n    getitem_1639 = _foreach_neg[11]\n    getitem_1640 = _foreach_neg[12]\n    getitem_1641 = _foreach_neg[13]\n    getitem_1642 = _foreach_neg[14]\n    getitem_1643 = _foreach_neg[15]\n    getitem_1644 = _foreach_neg[16]\n    getitem_1645 = _foreach_neg[17]\n    getitem_1646 = _foreach_neg[18]\n    getitem_1647 = _foreach_neg[19]\n    getitem_1648 = _foreach_neg[20]\n    getitem_1649 = _foreach_neg[21]\n    getitem_1650 = _foreach_neg[22]\n    getitem_1651 = _foreach_neg[23]\n    getitem_1652 = _foreach_neg[24]\n    getitem_1653 = _foreach_neg[25]\n    getitem_1654 = _foreach_neg[26]\n    getitem_1655 = _foreach_neg[27]\n    getitem_1656 = _foreach_neg[28]\n    getitem_1657 = _foreach_neg[29]\n    getitem_1658 = _foreach_neg[30]\n    getitem_1659 = _foreach_neg[31]\n    getitem_1660 = _foreach_neg[32]\n    getitem_1661 = _foreach_neg[33]\n    getitem_1662 = _foreach_neg[34]\n    getitem_1663 = _foreach_neg[35]\n    getitem_1664 = _foreach_neg[36]\n    getitem_1665 = _foreach_neg[37]\n    getitem_1666 = _foreach_neg[38]\n    getitem_1667 = _foreach_neg[39]\n    getitem_1668 = _foreach_neg[40]\n    getitem_1669 = _foreach_neg[41]\n    getitem_1670 = _foreach_neg[42]\n    getitem_1671 = _foreach_neg[43]\n    getitem_1672 = _foreach_neg[44]\n    getitem_1673 = _foreach_neg[45]\n    getitem_1674 = _foreach_neg[46]\n    getitem_1675 = _foreach_neg[47]\n    getitem_1676 = _foreach_neg[48]\n    getitem_1677 = _foreach_neg[49]\n    getitem_1678 = _foreach_neg[50]\n    getitem_1679 = _foreach_neg[51]\n    getitem_1680 = _foreach_neg[52]\n    getitem_1681 = _foreach_neg[53]\n    getitem_1682 = _foreach_neg[54]\n    getitem_1683 = _foreach_neg[55]\n    getitem_1684 = _foreach_neg[56]\n    getitem_1685 = _foreach_neg[57]\n    getitem_1686 = _foreach_neg[58]\n    getitem_1687 = _foreach_neg[59]\n    getitem_1688 = _foreach_neg[60]\n    getitem_1689 = _foreach_neg[61]\n    getitem_1690 = _foreach_neg[62]\n    getitem_1691 = _foreach_neg[63]\n    getitem_1692 = _foreach_neg[64]\n    getitem_1693 = _foreach_neg[65]\n    getitem_1694 = _foreach_neg[66]\n    getitem_1695 = _foreach_neg[67]\n    getitem_1696 = _foreach_neg[68]\n    getitem_1697 = _foreach_neg[69]\n    getitem_1698 = _foreach_neg[70]\n    getitem_1699 = _foreach_neg[71]\n    getitem_1700 = _foreach_neg[72]\n    getitem_1701 = _foreach_neg[73]\n    getitem_1702 = _foreach_neg[74]\n    getitem_1703 = _foreach_neg[75]\n    getitem_1704 = _foreach_neg[76]\n    getitem_1705 = _foreach_neg[77]\n    getitem_1706 = _foreach_neg[78]\n    getitem_1707 = _foreach_neg[79]\n    getitem_1708 = _foreach_neg[80]\n    getitem_1709 = _foreach_neg[81]\n    getitem_1710 = _foreach_neg[82]\n    getitem_1711 = _foreach_neg[83]\n    getitem_1712 = _foreach_neg[84]\n    getitem_1713 = _foreach_neg[85]\n    getitem_1714 = _foreach_neg[86]\n    getitem_1715 = _foreach_neg[87]\n    getitem_1716 = _foreach_neg[88]\n    getitem_1717 = _foreach_neg[89]\n    getitem_1718 = _foreach_neg[90]\n    getitem_1719 = _foreach_neg[91]\n    getitem_1720 = _foreach_neg[92]\n    getitem_1721 = _foreach_neg[93]\n    getitem_1722 = _foreach_neg[94]\n    getitem_1723 = _foreach_neg[95]\n    getitem_1724 = _foreach_neg[96]\n    getitem_1725 = _foreach_neg[97]\n    getitem_1726 = _foreach_neg[98]\n    getitem_1727 = _foreach_neg[99]\n    getitem_1728 = _foreach_neg[100]\n    getitem_1729 = _foreach_neg[101]\n    getitem_1730 = _foreach_neg[102]\n    getitem_1731 = _foreach_neg[103]\n    getitem_1732 = _foreach_neg[104]\n    getitem_1733 = _foreach_neg[105]\n    getitem_1734 = _foreach_neg[106]\n    getitem_1735 = _foreach_neg[107]\n    getitem_1736 = _foreach_neg[108]\n    getitem_1737 = _foreach_neg[109]\n    getitem_1738 = _foreach_neg[110]\n    getitem_1739 = _foreach_neg[111]\n    getitem_1740 = _foreach_neg[112]\n    getitem_1741 = _foreach_neg[113]\n    getitem_1742 = _foreach_neg[114]\n    getitem_1743 = _foreach_neg[115]\n    getitem_1744 = _foreach_neg[116]\n    getitem_1745 = _foreach_neg[117]\n    getitem_1746 = _foreach_neg[118]\n    getitem_1747 = _foreach_neg[119]\n    getitem_1748 = _foreach_neg[120]\n    getitem_1749 = _foreach_neg[121]\n    getitem_1750 = _foreach_neg[122]\n    getitem_1751 = _foreach_neg[123]\n    getitem_1752 = _foreach_neg[124]\n    getitem_1753 = _foreach_neg[125]\n    getitem_1754 = _foreach_neg[126]\n    getitem_1755 = _foreach_neg[127]\n    getitem_1756 = _foreach_neg[128]\n    getitem_1757 = _foreach_neg[129]\n    getitem_1758 = _foreach_neg[130]\n    getitem_1759 = _foreach_neg[131]\n    getitem_1760 = _foreach_neg[132]\n    getitem_1761 = _foreach_neg[133]\n    getitem_1762 = _foreach_neg[134]\n    getitem_1763 = _foreach_neg[135]\n    getitem_1764 = _foreach_neg[136]\n    getitem_1765 = _foreach_neg[137]\n    getitem_1766 = _foreach_neg[138]\n    getitem_1767 = _foreach_neg[139]\n    getitem_1768 = _foreach_neg[140]\n    getitem_1769 = _foreach_neg[141]\n    getitem_1770 = _foreach_neg[142]\n    getitem_1771 = _foreach_neg[143]\n    getitem_1772 = _foreach_neg[144]\n    getitem_1773 = _foreach_neg[145]\n    getitem_1774 = _foreach_neg[146]\n    getitem_1775 = _foreach_neg[147];  _foreach_neg = None\n    _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01);  getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None\n    getitem_1776 = _foreach_div[0]\n    getitem_1777 = _foreach_div[1]\n    getitem_1778 = _foreach_div[2]\n    getitem_1779 = _foreach_div[3]\n    getitem_1780 = _foreach_div[4]\n    getitem_1781 = _foreach_div[5]\n    getitem_1782 = _foreach_div[6]\n    getitem_1783 = _foreach_div[7]\n    getitem_1784 = _foreach_div[8]\n    getitem_1785 = _foreach_div[9]\n    getitem_1786 = _foreach_div[10]\n    getitem_1787 = _foreach_div[11]\n    getitem_1788 = _foreach_div[12]\n    getitem_1789 = _foreach_div[13]\n    getitem_1790 = _foreach_div[14]\n    getitem_1791 = _foreach_div[15]\n    getitem_1792 = _foreach_div[16]\n    getitem_1793 = _foreach_div[17]\n    getitem_1794 = _foreach_div[18]\n    getitem_1795 = _foreach_div[19]\n    getitem_1796 = _foreach_div[20]\n    getitem_1797 = _foreach_div[21]\n    getitem_1798 = _foreach_div[22]\n    getitem_1799 = _foreach_div[23]\n    getitem_1800 = _foreach_div[24]\n    getitem_1801 = _foreach_div[25]\n    getitem_1802 = _foreach_div[26]\n    getitem_1803 = _foreach_div[27]\n    getitem_1804 = _foreach_div[28]\n    getitem_1805 = _foreach_div[29]\n    getitem_1806 = _foreach_div[30]\n    getitem_1807 = _foreach_div[31]\n    getitem_1808 = _foreach_div[32]\n    getitem_1809 = _foreach_div[33]\n    getitem_1810 = _foreach_div[34]\n    getitem_1811 = _foreach_div[35]\n    getitem_1812 = _foreach_div[36]\n    getitem_1813 = _foreach_div[37]\n    getitem_1814 = _foreach_div[38]\n    getitem_1815 = _foreach_div[39]\n    getitem_1816 = _foreach_div[40]\n    getitem_1817 = _foreach_div[41]\n    getitem_1818 = _foreach_div[42]\n    getitem_1819 = _foreach_div[43]\n    getitem_1820 = _foreach_div[44]\n    getitem_1821 = _foreach_div[45]\n    getitem_1822 = _foreach_div[46]\n    getitem_1823 = _foreach_div[47]\n    getitem_1824 = _foreach_div[48]\n    getitem_1825 = _foreach_div[49]\n    getitem_1826 = _foreach_div[50]\n    getitem_1827 = _foreach_div[51]\n    getitem_1828 = _foreach_div[52]\n    getitem_1829 = _foreach_div[53]\n    getitem_1830 = _foreach_div[54]\n    getitem_1831 = _foreach_div[55]\n    getitem_1832 = _foreach_div[56]\n    getitem_1833 = _foreach_div[57]\n    getitem_1834 = _foreach_div[58]\n    getitem_1835 = _foreach_div[59]\n    getitem_1836 = _foreach_div[60]\n    getitem_1837 = _foreach_div[61]\n    getitem_1838 = _foreach_div[62]\n    getitem_1839 = _foreach_div[63]\n    getitem_1840 = _foreach_div[64]\n    getitem_1841 = _foreach_div[65]\n    getitem_1842 = _foreach_div[66]\n    getitem_1843 = _foreach_div[67]\n    getitem_1844 = _foreach_div[68]\n    getitem_1845 = _foreach_div[69]\n    getitem_1846 = _foreach_div[70]\n    getitem_1847 = _foreach_div[71]\n    getitem_1848 = _foreach_div[72]\n    getitem_1849 = _foreach_div[73]\n    getitem_1850 = _foreach_div[74]\n    getitem_1851 = _foreach_div[75]\n    getitem_1852 = _foreach_div[76]\n    getitem_1853 = _foreach_div[77]\n    getitem_1854 = _foreach_div[78]\n    getitem_1855 = _foreach_div[79]\n    getitem_1856 = _foreach_div[80]\n    getitem_1857 = _foreach_div[81]\n    getitem_1858 = _foreach_div[82]\n    getitem_1859 = _foreach_div[83]\n    getitem_1860 = _foreach_div[84]\n    getitem_1861 = _foreach_div[85]\n    getitem_1862 = _foreach_div[86]\n    getitem_1863 = _foreach_div[87]\n    getitem_1864 = _foreach_div[88]\n    getitem_1865 = _foreach_div[89]\n    getitem_1866 = _foreach_div[90]\n    getitem_1867 = _foreach_div[91]\n    getitem_1868 = _foreach_div[92]\n    getitem_1869 = _foreach_div[93]\n    getitem_1870 = _foreach_div[94]\n    getitem_1871 = _foreach_div[95]\n    getitem_1872 = _foreach_div[96]\n    getitem_1873 = _foreach_div[97]\n    getitem_1874 = _foreach_div[98]\n    getitem_1875 = _foreach_div[99]\n    getitem_1876 = _foreach_div[100]\n    getitem_1877 = _foreach_div[101]\n    getitem_1878 = _foreach_div[102]\n    getitem_1879 = _foreach_div[103]\n    getitem_1880 = _foreach_div[104]\n    getitem_1881 = _foreach_div[105]\n    getitem_1882 = _foreach_div[106]\n    getitem_1883 = _foreach_div[107]\n    getitem_1884 = _foreach_div[108]\n    getitem_1885 = _foreach_div[109]\n    getitem_1886 = _foreach_div[110]\n    getitem_1887 = _foreach_div[111]\n    getitem_1888 = _foreach_div[112]\n    getitem_1889 = _foreach_div[113]\n    getitem_1890 = _foreach_div[114]\n    getitem_1891 = _foreach_div[115]\n    getitem_1892 = _foreach_div[116]\n    getitem_1893 = _foreach_div[117]\n    getitem_1894 = _foreach_div[118]\n    getitem_1895 = _foreach_div[119]\n    getitem_1896 = _foreach_div[120]\n    getitem_1897 = _foreach_div[121]\n    getitem_1898 = _foreach_div[122]\n    getitem_1899 = _foreach_div[123]\n    getitem_1900 = _foreach_div[124]\n    getitem_1901 = _foreach_div[125]\n    getitem_1902 = _foreach_div[126]\n    getitem_1903 = _foreach_div[127]\n    getitem_1904 = _foreach_div[128]\n    getitem_1905 = _foreach_div[129]\n    getitem_1906 = _foreach_div[130]\n    getitem_1907 = _foreach_div[131]\n    getitem_1908 = _foreach_div[132]\n    getitem_1909 = _foreach_div[133]\n    getitem_1910 = _foreach_div[134]\n    getitem_1911 = _foreach_div[135]\n    getitem_1912 = _foreach_div[136]\n    getitem_1913 = _foreach_div[137]\n    getitem_1914 = _foreach_div[138]\n    getitem_1915 = _foreach_div[139]\n    getitem_1916 = _foreach_div[140]\n    getitem_1917 = _foreach_div[141]\n    getitem_1918 = _foreach_div[142]\n    getitem_1919 = _foreach_div[143]\n    getitem_1920 = _foreach_div[144]\n    getitem_1921 = _foreach_div[145]\n    getitem_1922 = _foreach_div[146]\n    getitem_1923 = _foreach_div[147];  _foreach_div = None\n    _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]);  getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None\n    getitem_1924 = _foreach_reciprocal[0]\n    getitem_1925 = _foreach_reciprocal[1]\n    getitem_1926 = _foreach_reciprocal[2]\n    getitem_1927 = _foreach_reciprocal[3]\n    getitem_1928 = _foreach_reciprocal[4]\n    getitem_1929 = _foreach_reciprocal[5]\n    getitem_1930 = _foreach_reciprocal[6]\n    getitem_1931 = _foreach_reciprocal[7]\n    getitem_1932 = _foreach_reciprocal[8]\n    getitem_1933 = _foreach_reciprocal[9]\n    getitem_1934 = _foreach_reciprocal[10]\n    getitem_1935 = _foreach_reciprocal[11]\n    getitem_1936 = _foreach_reciprocal[12]\n    getitem_1937 = _foreach_reciprocal[13]\n    getitem_1938 = _foreach_reciprocal[14]\n    getitem_1939 = _foreach_reciprocal[15]\n    getitem_1940 = _foreach_reciprocal[16]\n    getitem_1941 = _foreach_reciprocal[17]\n    getitem_1942 = _foreach_reciprocal[18]\n    getitem_1943 = _foreach_reciprocal[19]\n    getitem_1944 = _foreach_reciprocal[20]\n    getitem_1945 = _foreach_reciprocal[21]\n    getitem_1946 = _foreach_reciprocal[22]\n    getitem_1947 = _foreach_reciprocal[23]\n    getitem_1948 = _foreach_reciprocal[24]\n    getitem_1949 = _foreach_reciprocal[25]\n    getitem_1950 = _foreach_reciprocal[26]\n    getitem_1951 = _foreach_reciprocal[27]\n    getitem_1952 = _foreach_reciprocal[28]\n    getitem_1953 = _foreach_reciprocal[29]\n    getitem_1954 = _foreach_reciprocal[30]\n    getitem_1955 = _foreach_reciprocal[31]\n    getitem_1956 = _foreach_reciprocal[32]\n    getitem_1957 = _foreach_reciprocal[33]\n    getitem_1958 = _foreach_reciprocal[34]\n    getitem_1959 = _foreach_reciprocal[35]\n    getitem_1960 = _foreach_reciprocal[36]\n    getitem_1961 = _foreach_reciprocal[37]\n    getitem_1962 = _foreach_reciprocal[38]\n    getitem_1963 = _foreach_reciprocal[39]\n    getitem_1964 = _foreach_reciprocal[40]\n    getitem_1965 = _foreach_reciprocal[41]\n    getitem_1966 = _foreach_reciprocal[42]\n    getitem_1967 = _foreach_reciprocal[43]\n    getitem_1968 = _foreach_reciprocal[44]\n    getitem_1969 = _foreach_reciprocal[45]\n    getitem_1970 = _foreach_reciprocal[46]\n    getitem_1971 = _foreach_reciprocal[47]\n    getitem_1972 = _foreach_reciprocal[48]\n    getitem_1973 = _foreach_reciprocal[49]\n    getitem_1974 = _foreach_reciprocal[50]\n    getitem_1975 = _foreach_reciprocal[51]\n    getitem_1976 = _foreach_reciprocal[52]\n    getitem_1977 = _foreach_reciprocal[53]\n    getitem_1978 = _foreach_reciprocal[54]\n    getitem_1979 = _foreach_reciprocal[55]\n    getitem_1980 = _foreach_reciprocal[56]\n    getitem_1981 = _foreach_reciprocal[57]\n    getitem_1982 = _foreach_reciprocal[58]\n    getitem_1983 = _foreach_reciprocal[59]\n    getitem_1984 = _foreach_reciprocal[60]\n    getitem_1985 = _foreach_reciprocal[61]\n    getitem_1986 = _foreach_reciprocal[62]\n    getitem_1987 = _foreach_reciprocal[63]\n    getitem_1988 = _foreach_reciprocal[64]\n    getitem_1989 = _foreach_reciprocal[65]\n    getitem_1990 = _foreach_reciprocal[66]\n    getitem_1991 = _foreach_reciprocal[67]\n    getitem_1992 = _foreach_reciprocal[68]\n    getitem_1993 = _foreach_reciprocal[69]\n    getitem_1994 = _foreach_reciprocal[70]\n    getitem_1995 = _foreach_reciprocal[71]\n    getitem_1996 = _foreach_reciprocal[72]\n    getitem_1997 = _foreach_reciprocal[73]\n    getitem_1998 = _foreach_reciprocal[74]\n    getitem_1999 = _foreach_reciprocal[75]\n    getitem_2000 = _foreach_reciprocal[76]\n    getitem_2001 = _foreach_reciprocal[77]\n    getitem_2002 = _foreach_reciprocal[78]\n    getitem_2003 = _foreach_reciprocal[79]\n    getitem_2004 = _foreach_reciprocal[80]\n    getitem_2005 = _foreach_reciprocal[81]\n    getitem_2006 = _foreach_reciprocal[82]\n    getitem_2007 = _foreach_reciprocal[83]\n    getitem_2008 = _foreach_reciprocal[84]\n    getitem_2009 = _foreach_reciprocal[85]\n    getitem_2010 = _foreach_reciprocal[86]\n    getitem_2011 = _foreach_reciprocal[87]\n    getitem_2012 = _foreach_reciprocal[88]\n    getitem_2013 = _foreach_reciprocal[89]\n    getitem_2014 = _foreach_reciprocal[90]\n    getitem_2015 = _foreach_reciprocal[91]\n    getitem_2016 = _foreach_reciprocal[92]\n    getitem_2017 = _foreach_reciprocal[93]\n    getitem_2018 = _foreach_reciprocal[94]\n    getitem_2019 = _foreach_reciprocal[95]\n    getitem_2020 = _foreach_reciprocal[96]\n    getitem_2021 = _foreach_reciprocal[97]\n    getitem_2022 = _foreach_reciprocal[98]\n    getitem_2023 = _foreach_reciprocal[99]\n    getitem_2024 = _foreach_reciprocal[100]\n    getitem_2025 = _foreach_reciprocal[101]\n    getitem_2026 = _foreach_reciprocal[102]\n    getitem_2027 = _foreach_reciprocal[103]\n    getitem_2028 = _foreach_reciprocal[104]\n    getitem_2029 = _foreach_reciprocal[105]\n    getitem_2030 = _foreach_reciprocal[106]\n    getitem_2031 = _foreach_reciprocal[107]\n    getitem_2032 = _foreach_reciprocal[108]\n    getitem_2033 = _foreach_reciprocal[109]\n    getitem_2034 = _foreach_reciprocal[110]\n    getitem_2035 = _foreach_reciprocal[111]\n    getitem_2036 = _foreach_reciprocal[112]\n    getitem_2037 = _foreach_reciprocal[113]\n    getitem_2038 = _foreach_reciprocal[114]\n    getitem_2039 = _foreach_reciprocal[115]\n    getitem_2040 = _foreach_reciprocal[116]\n    getitem_2041 = _foreach_reciprocal[117]\n    getitem_2042 = _foreach_reciprocal[118]\n    getitem_2043 = _foreach_reciprocal[119]\n    getitem_2044 = _foreach_reciprocal[120]\n    getitem_2045 = _foreach_reciprocal[121]\n    getitem_2046 = _foreach_reciprocal[122]\n    getitem_2047 = _foreach_reciprocal[123]\n    getitem_2048 = _foreach_reciprocal[124]\n    getitem_2049 = _foreach_reciprocal[125]\n    getitem_2050 = _foreach_reciprocal[126]\n    getitem_2051 = _foreach_reciprocal[127]\n    getitem_2052 = _foreach_reciprocal[128]\n    getitem_2053 = _foreach_reciprocal[129]\n    getitem_2054 = _foreach_reciprocal[130]\n    getitem_2055 = _foreach_reciprocal[131]\n    getitem_2056 = _foreach_reciprocal[132]\n    getitem_2057 = _foreach_reciprocal[133]\n    getitem_2058 = _foreach_reciprocal[134]\n    getitem_2059 = _foreach_reciprocal[135]\n    getitem_2060 = _foreach_reciprocal[136]\n    getitem_2061 = _foreach_reciprocal[137]\n    getitem_2062 = _foreach_reciprocal[138]\n    getitem_2063 = _foreach_reciprocal[139]\n    getitem_2064 = _foreach_reciprocal[140]\n    getitem_2065 = _foreach_reciprocal[141]\n    getitem_2066 = _foreach_reciprocal[142]\n    getitem_2067 = _foreach_reciprocal[143]\n    getitem_2068 = _foreach_reciprocal[144]\n    getitem_2069 = _foreach_reciprocal[145]\n    getitem_2070 = _foreach_reciprocal[146]\n    getitem_2071 = _foreach_reciprocal[147];  _foreach_reciprocal = None\n    _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]);  getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None\n    getitem_2072 = _foreach_sqrt[0]\n    getitem_2073 = _foreach_sqrt[1]\n    getitem_2074 = _foreach_sqrt[2]\n    getitem_2075 = _foreach_sqrt[3]\n    getitem_2076 = _foreach_sqrt[4]\n    getitem_2077 = _foreach_sqrt[5]\n    getitem_2078 = _foreach_sqrt[6]\n    getitem_2079 = _foreach_sqrt[7]\n    getitem_2080 = _foreach_sqrt[8]\n    getitem_2081 = _foreach_sqrt[9]\n    getitem_2082 = _foreach_sqrt[10]\n    getitem_2083 = _foreach_sqrt[11]\n    getitem_2084 = _foreach_sqrt[12]\n    getitem_2085 = _foreach_sqrt[13]\n    getitem_2086 = _foreach_sqrt[14]\n    getitem_2087 = _foreach_sqrt[15]\n    getitem_2088 = _foreach_sqrt[16]\n    getitem_2089 = _foreach_sqrt[17]\n    getitem_2090 = _foreach_sqrt[18]\n    getitem_2091 = _foreach_sqrt[19]\n    getitem_2092 = _foreach_sqrt[20]\n    getitem_2093 = _foreach_sqrt[21]\n    getitem_2094 = _foreach_sqrt[22]\n    getitem_2095 = _foreach_sqrt[23]\n    getitem_2096 = _foreach_sqrt[24]\n    getitem_2097 = _foreach_sqrt[25]\n    getitem_2098 = _foreach_sqrt[26]\n    getitem_2099 = _foreach_sqrt[27]\n    getitem_2100 = _foreach_sqrt[28]\n    getitem_2101 = _foreach_sqrt[29]\n    getitem_2102 = _foreach_sqrt[30]\n    getitem_2103 = _foreach_sqrt[31]\n    getitem_2104 = _foreach_sqrt[32]\n    getitem_2105 = _foreach_sqrt[33]\n    getitem_2106 = _foreach_sqrt[34]\n    getitem_2107 = _foreach_sqrt[35]\n    getitem_2108 = _foreach_sqrt[36]\n    getitem_2109 = _foreach_sqrt[37]\n    getitem_2110 = _foreach_sqrt[38]\n    getitem_2111 = _foreach_sqrt[39]\n    getitem_2112 = _foreach_sqrt[40]\n    getitem_2113 = _foreach_sqrt[41]\n    getitem_2114 = _foreach_sqrt[42]\n    getitem_2115 = _foreach_sqrt[43]\n    getitem_2116 = _foreach_sqrt[44]\n    getitem_2117 = _foreach_sqrt[45]\n    getitem_2118 = _foreach_sqrt[46]\n    getitem_2119 = _foreach_sqrt[47]\n    getitem_2120 = _foreach_sqrt[48]\n    getitem_2121 = _foreach_sqrt[49]\n    getitem_2122 = _foreach_sqrt[50]\n    getitem_2123 = _foreach_sqrt[51]\n    getitem_2124 = _foreach_sqrt[52]\n    getitem_2125 = _foreach_sqrt[53]\n    getitem_2126 = _foreach_sqrt[54]\n    getitem_2127 = _foreach_sqrt[55]\n    getitem_2128 = _foreach_sqrt[56]\n    getitem_2129 = _foreach_sqrt[57]\n    getitem_2130 = _foreach_sqrt[58]\n    getitem_2131 = _foreach_sqrt[59]\n    getitem_2132 = _foreach_sqrt[60]\n    getitem_2133 = _foreach_sqrt[61]\n    getitem_2134 = _foreach_sqrt[62]\n    getitem_2135 = _foreach_sqrt[63]\n    getitem_2136 = _foreach_sqrt[64]\n    getitem_2137 = _foreach_sqrt[65]\n    getitem_2138 = _foreach_sqrt[66]\n    getitem_2139 = _foreach_sqrt[67]\n    getitem_2140 = _foreach_sqrt[68]\n    getitem_2141 = _foreach_sqrt[69]\n    getitem_2142 = _foreach_sqrt[70]\n    getitem_2143 = _foreach_sqrt[71]\n    getitem_2144 = _foreach_sqrt[72]\n    getitem_2145 = _foreach_sqrt[73]\n    getitem_2146 = _foreach_sqrt[74]\n    getitem_2147 = _foreach_sqrt[75]\n    getitem_2148 = _foreach_sqrt[76]\n    getitem_2149 = _foreach_sqrt[77]\n    getitem_2150 = _foreach_sqrt[78]\n    getitem_2151 = _foreach_sqrt[79]\n    getitem_2152 = _foreach_sqrt[80]\n    getitem_2153 = _foreach_sqrt[81]\n    getitem_2154 = _foreach_sqrt[82]\n    getitem_2155 = _foreach_sqrt[83]\n    getitem_2156 = _foreach_sqrt[84]\n    getitem_2157 = _foreach_sqrt[85]\n    getitem_2158 = _foreach_sqrt[86]\n    getitem_2159 = _foreach_sqrt[87]\n    getitem_2160 = _foreach_sqrt[88]\n    getitem_2161 = _foreach_sqrt[89]\n    getitem_2162 = _foreach_sqrt[90]\n    getitem_2163 = _foreach_sqrt[91]\n    getitem_2164 = _foreach_sqrt[92]\n    getitem_2165 = _foreach_sqrt[93]\n    getitem_2166 = _foreach_sqrt[94]\n    getitem_2167 = _foreach_sqrt[95]\n    getitem_2168 = _foreach_sqrt[96]\n    getitem_2169 = _foreach_sqrt[97]\n    getitem_2170 = _foreach_sqrt[98]\n    getitem_2171 = _foreach_sqrt[99]\n    getitem_2172 = _foreach_sqrt[100]\n    getitem_2173 = _foreach_sqrt[101]\n    getitem_2174 = _foreach_sqrt[102]\n    getitem_2175 = _foreach_sqrt[103]\n    getitem_2176 = _foreach_sqrt[104]\n    getitem_2177 = _foreach_sqrt[105]\n    getitem_2178 = _foreach_sqrt[106]\n    getitem_2179 = _foreach_sqrt[107]\n    getitem_2180 = _foreach_sqrt[108]\n    getitem_2181 = _foreach_sqrt[109]\n    getitem_2182 = _foreach_sqrt[110]\n    getitem_2183 = _foreach_sqrt[111]\n    getitem_2184 = _foreach_sqrt[112]\n    getitem_2185 = _foreach_sqrt[113]\n    getitem_2186 = _foreach_sqrt[114]\n    getitem_2187 = _foreach_sqrt[115]\n    getitem_2188 = _foreach_sqrt[116]\n    getitem_2189 = _foreach_sqrt[117]\n    getitem_2190 = _foreach_sqrt[118]\n    getitem_2191 = _foreach_sqrt[119]\n    getitem_2192 = _foreach_sqrt[120]\n    getitem_2193 = _foreach_sqrt[121]\n    getitem_2194 = _foreach_sqrt[122]\n    getitem_2195 = _foreach_sqrt[123]\n    getitem_2196 = _foreach_sqrt[124]\n    getitem_2197 = _foreach_sqrt[125]\n    getitem_2198 = _foreach_sqrt[126]\n    getitem_2199 = _foreach_sqrt[127]\n    getitem_2200 = _foreach_sqrt[128]\n    getitem_2201 = _foreach_sqrt[129]\n    getitem_2202 = _foreach_sqrt[130]\n    getitem_2203 = _foreach_sqrt[131]\n    getitem_2204 = _foreach_sqrt[132]\n    getitem_2205 = _foreach_sqrt[133]\n    getitem_2206 = _foreach_sqrt[134]\n    getitem_2207 = _foreach_sqrt[135]\n    getitem_2208 = _foreach_sqrt[136]\n    getitem_2209 = _foreach_sqrt[137]\n    getitem_2210 = _foreach_sqrt[138]\n    getitem_2211 = _foreach_sqrt[139]\n    getitem_2212 = _foreach_sqrt[140]\n    getitem_2213 = _foreach_sqrt[141]\n    getitem_2214 = _foreach_sqrt[142]\n    getitem_2215 = _foreach_sqrt[143]\n    getitem_2216 = _foreach_sqrt[144]\n    getitem_2217 = _foreach_sqrt[145]\n    getitem_2218 = _foreach_sqrt[146]\n    getitem_2219 = _foreach_sqrt[147];  _foreach_sqrt = None\n    _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])\n    getitem_2220 = _foreach_sqrt_1[0]\n    getitem_2221 = _foreach_sqrt_1[1]\n    getitem_2222 = _foreach_sqrt_1[2]\n    getitem_2223 = _foreach_sqrt_1[3]\n    getitem_2224 = _foreach_sqrt_1[4]\n    getitem_2225 = _foreach_sqrt_1[5]\n    getitem_2226 = _foreach_sqrt_1[6]\n    getitem_2227 = _foreach_sqrt_1[7]\n    getitem_2228 = _foreach_sqrt_1[8]\n    getitem_2229 = _foreach_sqrt_1[9]\n    getitem_2230 = _foreach_sqrt_1[10]\n    getitem_2231 = _foreach_sqrt_1[11]\n    getitem_2232 = _foreach_sqrt_1[12]\n    getitem_2233 = _foreach_sqrt_1[13]\n    getitem_2234 = _foreach_sqrt_1[14]\n    getitem_2235 = _foreach_sqrt_1[15]\n    getitem_2236 = _foreach_sqrt_1[16]\n    getitem_2237 = _foreach_sqrt_1[17]\n    getitem_2238 = _foreach_sqrt_1[18]\n    getitem_2239 = _foreach_sqrt_1[19]\n    getitem_2240 = _foreach_sqrt_1[20]\n    getitem_2241 = _foreach_sqrt_1[21]\n    getitem_2242 = _foreach_sqrt_1[22]\n    getitem_2243 = _foreach_sqrt_1[23]\n    getitem_2244 = _foreach_sqrt_1[24]\n    getitem_2245 = _foreach_sqrt_1[25]\n    getitem_2246 = _foreach_sqrt_1[26]\n    getitem_2247 = _foreach_sqrt_1[27]\n    getitem_2248 = _foreach_sqrt_1[28]\n    getitem_2249 = _foreach_sqrt_1[29]\n    getitem_2250 = _foreach_sqrt_1[30]\n    getitem_2251 = _foreach_sqrt_1[31]\n    getitem_2252 = _foreach_sqrt_1[32]\n    getitem_2253 = _foreach_sqrt_1[33]\n    getitem_2254 = _foreach_sqrt_1[34]\n    getitem_2255 = _foreach_sqrt_1[35]\n    getitem_2256 = _foreach_sqrt_1[36]\n    getitem_2257 = _foreach_sqrt_1[37]\n    getitem_2258 = _foreach_sqrt_1[38]\n    getitem_2259 = _foreach_sqrt_1[39]\n    getitem_2260 = _foreach_sqrt_1[40]\n    getitem_2261 = _foreach_sqrt_1[41]\n    getitem_2262 = _foreach_sqrt_1[42]\n    getitem_2263 = _foreach_sqrt_1[43]\n    getitem_2264 = _foreach_sqrt_1[44]\n    getitem_2265 = _foreach_sqrt_1[45]\n    getitem_2266 = _foreach_sqrt_1[46]\n    getitem_2267 = _foreach_sqrt_1[47]\n    getitem_2268 = _foreach_sqrt_1[48]\n    getitem_2269 = _foreach_sqrt_1[49]\n    getitem_2270 = _foreach_sqrt_1[50]\n    getitem_2271 = _foreach_sqrt_1[51]\n    getitem_2272 = _foreach_sqrt_1[52]\n    getitem_2273 = _foreach_sqrt_1[53]\n    getitem_2274 = _foreach_sqrt_1[54]\n    getitem_2275 = _foreach_sqrt_1[55]\n    getitem_2276 = _foreach_sqrt_1[56]\n    getitem_2277 = _foreach_sqrt_1[57]\n    getitem_2278 = _foreach_sqrt_1[58]\n    getitem_2279 = _foreach_sqrt_1[59]\n    getitem_2280 = _foreach_sqrt_1[60]\n    getitem_2281 = _foreach_sqrt_1[61]\n    getitem_2282 = _foreach_sqrt_1[62]\n    getitem_2283 = _foreach_sqrt_1[63]\n    getitem_2284 = _foreach_sqrt_1[64]\n    getitem_2285 = _foreach_sqrt_1[65]\n    getitem_2286 = _foreach_sqrt_1[66]\n    getitem_2287 = _foreach_sqrt_1[67]\n    getitem_2288 = _foreach_sqrt_1[68]\n    getitem_2289 = _foreach_sqrt_1[69]\n    getitem_2290 = _foreach_sqrt_1[70]\n    getitem_2291 = _foreach_sqrt_1[71]\n    getitem_2292 = _foreach_sqrt_1[72]\n    getitem_2293 = _foreach_sqrt_1[73]\n    getitem_2294 = _foreach_sqrt_1[74]\n    getitem_2295 = _foreach_sqrt_1[75]\n    getitem_2296 = _foreach_sqrt_1[76]\n    getitem_2297 = _foreach_sqrt_1[77]\n    getitem_2298 = _foreach_sqrt_1[78]\n    getitem_2299 = _foreach_sqrt_1[79]\n    getitem_2300 = _foreach_sqrt_1[80]\n    getitem_2301 = _foreach_sqrt_1[81]\n    getitem_2302 = _foreach_sqrt_1[82]\n    getitem_2303 = _foreach_sqrt_1[83]\n    getitem_2304 = _foreach_sqrt_1[84]\n    getitem_2305 = _foreach_sqrt_1[85]\n    getitem_2306 = _foreach_sqrt_1[86]\n    getitem_2307 = _foreach_sqrt_1[87]\n    getitem_2308 = _foreach_sqrt_1[88]\n    getitem_2309 = _foreach_sqrt_1[89]\n    getitem_2310 = _foreach_sqrt_1[90]\n    getitem_2311 = _foreach_sqrt_1[91]\n    getitem_2312 = _foreach_sqrt_1[92]\n    getitem_2313 = _foreach_sqrt_1[93]\n    getitem_2314 = _foreach_sqrt_1[94]\n    getitem_2315 = _foreach_sqrt_1[95]\n    getitem_2316 = _foreach_sqrt_1[96]\n    getitem_2317 = _foreach_sqrt_1[97]\n    getitem_2318 = _foreach_sqrt_1[98]\n    getitem_2319 = _foreach_sqrt_1[99]\n    getitem_2320 = _foreach_sqrt_1[100]\n    getitem_2321 = _foreach_sqrt_1[101]\n    getitem_2322 = _foreach_sqrt_1[102]\n    getitem_2323 = _foreach_sqrt_1[103]\n    getitem_2324 = _foreach_sqrt_1[104]\n    getitem_2325 = _foreach_sqrt_1[105]\n    getitem_2326 = _foreach_sqrt_1[106]\n    getitem_2327 = _foreach_sqrt_1[107]\n    getitem_2328 = _foreach_sqrt_1[108]\n    getitem_2329 = _foreach_sqrt_1[109]\n    getitem_2330 = _foreach_sqrt_1[110]\n    getitem_2331 = _foreach_sqrt_1[111]\n    getitem_2332 = _foreach_sqrt_1[112]\n    getitem_2333 = _foreach_sqrt_1[113]\n    getitem_2334 = _foreach_sqrt_1[114]\n    getitem_2335 = _foreach_sqrt_1[115]\n    getitem_2336 = _foreach_sqrt_1[116]\n    getitem_2337 = _foreach_sqrt_1[117]\n    getitem_2338 = _foreach_sqrt_1[118]\n    getitem_2339 = _foreach_sqrt_1[119]\n    getitem_2340 = _foreach_sqrt_1[120]\n    getitem_2341 = _foreach_sqrt_1[121]\n    getitem_2342 = _foreach_sqrt_1[122]\n    getitem_2343 = _foreach_sqrt_1[123]\n    getitem_2344 = _foreach_sqrt_1[124]\n    getitem_2345 = _foreach_sqrt_1[125]\n    getitem_2346 = _foreach_sqrt_1[126]\n    getitem_2347 = _foreach_sqrt_1[127]\n    getitem_2348 = _foreach_sqrt_1[128]\n    getitem_2349 = _foreach_sqrt_1[129]\n    getitem_2350 = _foreach_sqrt_1[130]\n    getitem_2351 = _foreach_sqrt_1[131]\n    getitem_2352 = _foreach_sqrt_1[132]\n    getitem_2353 = _foreach_sqrt_1[133]\n    getitem_2354 = _foreach_sqrt_1[134]\n    getitem_2355 = _foreach_sqrt_1[135]\n    getitem_2356 = _foreach_sqrt_1[136]\n    getitem_2357 = _foreach_sqrt_1[137]\n    getitem_2358 = _foreach_sqrt_1[138]\n    getitem_2359 = _foreach_sqrt_1[139]\n    getitem_2360 = _foreach_sqrt_1[140]\n    getitem_2361 = _foreach_sqrt_1[141]\n    getitem_2362 = _foreach_sqrt_1[142]\n    getitem_2363 = _foreach_sqrt_1[143]\n    getitem_2364 = _foreach_sqrt_1[144]\n    getitem_2365 = _foreach_sqrt_1[145]\n    getitem_2366 = _foreach_sqrt_1[146]\n    getitem_2367 = _foreach_sqrt_1[147];  _foreach_sqrt_1 = None\n    _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]);  getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None\n    getitem_2368 = _foreach_div_1[0]\n    getitem_2369 = _foreach_div_1[1]\n    getitem_2370 = _foreach_div_1[2]\n    getitem_2371 = _foreach_div_1[3]\n    getitem_2372 = _foreach_div_1[4]\n    getitem_2373 = _foreach_div_1[5]\n    getitem_2374 = _foreach_div_1[6]\n    getitem_2375 = _foreach_div_1[7]\n    getitem_2376 = _foreach_div_1[8]\n    getitem_2377 = _foreach_div_1[9]\n    getitem_2378 = _foreach_div_1[10]\n    getitem_2379 = _foreach_div_1[11]\n    getitem_2380 = _foreach_div_1[12]\n    getitem_2381 = _foreach_div_1[13]\n    getitem_2382 = _foreach_div_1[14]\n    getitem_2383 = _foreach_div_1[15]\n    getitem_2384 = _foreach_div_1[16]\n    getitem_2385 = _foreach_div_1[17]\n    getitem_2386 = _foreach_div_1[18]\n    getitem_2387 = _foreach_div_1[19]\n    getitem_2388 = _foreach_div_1[20]\n    getitem_2389 = _foreach_div_1[21]\n    getitem_2390 = _foreach_div_1[22]\n    getitem_2391 = _foreach_div_1[23]\n    getitem_2392 = _foreach_div_1[24]\n    getitem_2393 = _foreach_div_1[25]\n    getitem_2394 = _foreach_div_1[26]\n    getitem_2395 = _foreach_div_1[27]\n    getitem_2396 = _foreach_div_1[28]\n    getitem_2397 = _foreach_div_1[29]\n    getitem_2398 = _foreach_div_1[30]\n    getitem_2399 = _foreach_div_1[31]\n    getitem_2400 = _foreach_div_1[32]\n    getitem_2401 = _foreach_div_1[33]\n    getitem_2402 = _foreach_div_1[34]\n    getitem_2403 = _foreach_div_1[35]\n    getitem_2404 = _foreach_div_1[36]\n    getitem_2405 = _foreach_div_1[37]\n    getitem_2406 = _foreach_div_1[38]\n    getitem_2407 = _foreach_div_1[39]\n    getitem_2408 = _foreach_div_1[40]\n    getitem_2409 = _foreach_div_1[41]\n    getitem_2410 = _foreach_div_1[42]\n    getitem_2411 = _foreach_div_1[43]\n    getitem_2412 = _foreach_div_1[44]\n    getitem_2413 = _foreach_div_1[45]\n    getitem_2414 = _foreach_div_1[46]\n    getitem_2415 = _foreach_div_1[47]\n    getitem_2416 = _foreach_div_1[48]\n    getitem_2417 = _foreach_div_1[49]\n    getitem_2418 = _foreach_div_1[50]\n    getitem_2419 = _foreach_div_1[51]\n    getitem_2420 = _foreach_div_1[52]\n    getitem_2421 = _foreach_div_1[53]\n    getitem_2422 = _foreach_div_1[54]\n    getitem_2423 = _foreach_div_1[55]\n    getitem_2424 = _foreach_div_1[56]\n    getitem_2425 = _foreach_div_1[57]\n    getitem_2426 = _foreach_div_1[58]\n    getitem_2427 = _foreach_div_1[59]\n    getitem_2428 = _foreach_div_1[60]\n    getitem_2429 = _foreach_div_1[61]\n    getitem_2430 = _foreach_div_1[62]\n    getitem_2431 = _foreach_div_1[63]\n    getitem_2432 = _foreach_div_1[64]\n    getitem_2433 = _foreach_div_1[65]\n    getitem_2434 = _foreach_div_1[66]\n    getitem_2435 = _foreach_div_1[67]\n    getitem_2436 = _foreach_div_1[68]\n    getitem_2437 = _foreach_div_1[69]\n    getitem_2438 = _foreach_div_1[70]\n    getitem_2439 = _foreach_div_1[71]\n    getitem_2440 = _foreach_div_1[72]\n    getitem_2441 = _foreach_div_1[73]\n    getitem_2442 = _foreach_div_1[74]\n    getitem_2443 = _foreach_div_1[75]\n    getitem_2444 = _foreach_div_1[76]\n    getitem_2445 = _foreach_div_1[77]\n    getitem_2446 = _foreach_div_1[78]\n    getitem_2447 = _foreach_div_1[79]\n    getitem_2448 = _foreach_div_1[80]\n    getitem_2449 = _foreach_div_1[81]\n    getitem_2450 = _foreach_div_1[82]\n    getitem_2451 = _foreach_div_1[83]\n    getitem_2452 = _foreach_div_1[84]\n    getitem_2453 = _foreach_div_1[85]\n    getitem_2454 = _foreach_div_1[86]\n    getitem_2455 = _foreach_div_1[87]\n    getitem_2456 = _foreach_div_1[88]\n    getitem_2457 = _foreach_div_1[89]\n    getitem_2458 = _foreach_div_1[90]\n    getitem_2459 = _foreach_div_1[91]\n    getitem_2460 = _foreach_div_1[92]\n    getitem_2461 = _foreach_div_1[93]\n    getitem_2462 = _foreach_div_1[94]\n    getitem_2463 = _foreach_div_1[95]\n    getitem_2464 = _foreach_div_1[96]\n    getitem_2465 = _foreach_div_1[97]\n    getitem_2466 = _foreach_div_1[98]\n    getitem_2467 = _foreach_div_1[99]\n    getitem_2468 = _foreach_div_1[100]\n    getitem_2469 = _foreach_div_1[101]\n    getitem_2470 = _foreach_div_1[102]\n    getitem_2471 = _foreach_div_1[103]\n    getitem_2472 = _foreach_div_1[104]\n    getitem_2473 = _foreach_div_1[105]\n    getitem_2474 = _foreach_div_1[106]\n    getitem_2475 = _foreach_div_1[107]\n    getitem_2476 = _foreach_div_1[108]\n    getitem_2477 = _foreach_div_1[109]\n    getitem_2478 = _foreach_div_1[110]\n    getitem_2479 = _foreach_div_1[111]\n    getitem_2480 = _foreach_div_1[112]\n    getitem_2481 = _foreach_div_1[113]\n    getitem_2482 = _foreach_div_1[114]\n    getitem_2483 = _foreach_div_1[115]\n    getitem_2484 = _foreach_div_1[116]\n    getitem_2485 = _foreach_div_1[117]\n    getitem_2486 = _foreach_div_1[118]\n    getitem_2487 = _foreach_div_1[119]\n    getitem_2488 = _foreach_div_1[120]\n    getitem_2489 = _foreach_div_1[121]\n    getitem_2490 = _foreach_div_1[122]\n    getitem_2491 = _foreach_div_1[123]\n    getitem_2492 = _foreach_div_1[124]\n    getitem_2493 = _foreach_div_1[125]\n    getitem_2494 = _foreach_div_1[126]\n    getitem_2495 = _foreach_div_1[127]\n    getitem_2496 = _foreach_div_1[128]\n    getitem_2497 = _foreach_div_1[129]\n    getitem_2498 = _foreach_div_1[130]\n    getitem_2499 = _foreach_div_1[131]\n    getitem_2500 = _foreach_div_1[132]\n    getitem_2501 = _foreach_div_1[133]\n    getitem_2502 = _foreach_div_1[134]\n    getitem_2503 = _foreach_div_1[135]\n    getitem_2504 = _foreach_div_1[136]\n    getitem_2505 = _foreach_div_1[137]\n    getitem_2506 = _foreach_div_1[138]\n    getitem_2507 = _foreach_div_1[139]\n    getitem_2508 = _foreach_div_1[140]\n    getitem_2509 = _foreach_div_1[141]\n    getitem_2510 = _foreach_div_1[142]\n    getitem_2511 = _foreach_div_1[143]\n    getitem_2512 = _foreach_div_1[144]\n    getitem_2513 = _foreach_div_1[145]\n    getitem_2514 = _foreach_div_1[146]\n    getitem_2515 = _foreach_div_1[147];  _foreach_div_1 = None\n    _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08);  getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None\n    getitem_2516 = _foreach_add_3[0]\n    getitem_2517 = _foreach_add_3[1]\n    getitem_2518 = _foreach_add_3[2]\n    getitem_2519 = _foreach_add_3[3]\n    getitem_2520 = _foreach_add_3[4]\n    getitem_2521 = _foreach_add_3[5]\n    getitem_2522 = _foreach_add_3[6]\n    getitem_2523 = _foreach_add_3[7]\n    getitem_2524 = _foreach_add_3[8]\n    getitem_2525 = _foreach_add_3[9]\n    getitem_2526 = _foreach_add_3[10]\n    getitem_2527 = _foreach_add_3[11]\n    getitem_2528 = _foreach_add_3[12]\n    getitem_2529 = _foreach_add_3[13]\n    getitem_2530 = _foreach_add_3[14]\n    getitem_2531 = _foreach_add_3[15]\n    getitem_2532 = _foreach_add_3[16]\n    getitem_2533 = _foreach_add_3[17]\n    getitem_2534 = _foreach_add_3[18]\n    getitem_2535 = _foreach_add_3[19]\n    getitem_2536 = _foreach_add_3[20]\n    getitem_2537 = _foreach_add_3[21]\n    getitem_2538 = _foreach_add_3[22]\n    getitem_2539 = _foreach_add_3[23]\n    getitem_2540 = _foreach_add_3[24]\n    getitem_2541 = _foreach_add_3[25]\n    getitem_2542 = _foreach_add_3[26]\n    getitem_2543 = _foreach_add_3[27]\n    getitem_2544 = _foreach_add_3[28]\n    getitem_2545 = _foreach_add_3[29]\n    getitem_2546 = _foreach_add_3[30]\n    getitem_2547 = _foreach_add_3[31]\n    getitem_2548 = _foreach_add_3[32]\n    getitem_2549 = _foreach_add_3[33]\n    getitem_2550 = _foreach_add_3[34]\n    getitem_2551 = _foreach_add_3[35]\n    getitem_2552 = _foreach_add_3[36]\n    getitem_2553 = _foreach_add_3[37]\n    getitem_2554 = _foreach_add_3[38]\n    getitem_2555 = _foreach_add_3[39]\n    getitem_2556 = _foreach_add_3[40]\n    getitem_2557 = _foreach_add_3[41]\n    getitem_2558 = _foreach_add_3[42]\n    getitem_2559 = _foreach_add_3[43]\n    getitem_2560 = _foreach_add_3[44]\n    getitem_2561 = _foreach_add_3[45]\n    getitem_2562 = _foreach_add_3[46]\n    getitem_2563 = _foreach_add_3[47]\n    getitem_2564 = _foreach_add_3[48]\n    getitem_2565 = _foreach_add_3[49]\n    getitem_2566 = _foreach_add_3[50]\n    getitem_2567 = _foreach_add_3[51]\n    getitem_2568 = _foreach_add_3[52]\n    getitem_2569 = _foreach_add_3[53]\n    getitem_2570 = _foreach_add_3[54]\n    getitem_2571 = _foreach_add_3[55]\n    getitem_2572 = _foreach_add_3[56]\n    getitem_2573 = _foreach_add_3[57]\n    getitem_2574 = _foreach_add_3[58]\n    getitem_2575 = _foreach_add_3[59]\n    getitem_2576 = _foreach_add_3[60]\n    getitem_2577 = _foreach_add_3[61]\n    getitem_2578 = _foreach_add_3[62]\n    getitem_2579 = _foreach_add_3[63]\n    getitem_2580 = _foreach_add_3[64]\n    getitem_2581 = _foreach_add_3[65]\n    getitem_2582 = _foreach_add_3[66]\n    getitem_2583 = _foreach_add_3[67]\n    getitem_2584 = _foreach_add_3[68]\n    getitem_2585 = _foreach_add_3[69]\n    getitem_2586 = _foreach_add_3[70]\n    getitem_2587 = _foreach_add_3[71]\n    getitem_2588 = _foreach_add_3[72]\n    getitem_2589 = _foreach_add_3[73]\n    getitem_2590 = _foreach_add_3[74]\n    getitem_2591 = _foreach_add_3[75]\n    getitem_2592 = _foreach_add_3[76]\n    getitem_2593 = _foreach_add_3[77]\n    getitem_2594 = _foreach_add_3[78]\n    getitem_2595 = _foreach_add_3[79]\n    getitem_2596 = _foreach_add_3[80]\n    getitem_2597 = _foreach_add_3[81]\n    getitem_2598 = _foreach_add_3[82]\n    getitem_2599 = _foreach_add_3[83]\n    getitem_2600 = _foreach_add_3[84]\n    getitem_2601 = _foreach_add_3[85]\n    getitem_2602 = _foreach_add_3[86]\n    getitem_2603 = _foreach_add_3[87]\n    getitem_2604 = _foreach_add_3[88]\n    getitem_2605 = _foreach_add_3[89]\n    getitem_2606 = _foreach_add_3[90]\n    getitem_2607 = _foreach_add_3[91]\n    getitem_2608 = _foreach_add_3[92]\n    getitem_2609 = _foreach_add_3[93]\n    getitem_2610 = _foreach_add_3[94]\n    getitem_2611 = _foreach_add_3[95]\n    getitem_2612 = _foreach_add_3[96]\n    getitem_2613 = _foreach_add_3[97]\n    getitem_2614 = _foreach_add_3[98]\n    getitem_2615 = _foreach_add_3[99]\n    getitem_2616 = _foreach_add_3[100]\n    getitem_2617 = _foreach_add_3[101]\n    getitem_2618 = _foreach_add_3[102]\n    getitem_2619 = _foreach_add_3[103]\n    getitem_2620 = _foreach_add_3[104]\n    getitem_2621 = _foreach_add_3[105]\n    getitem_2622 = _foreach_add_3[106]\n    getitem_2623 = _foreach_add_3[107]\n    getitem_2624 = _foreach_add_3[108]\n    getitem_2625 = _foreach_add_3[109]\n    getitem_2626 = _foreach_add_3[110]\n    getitem_2627 = _foreach_add_3[111]\n    getitem_2628 = _foreach_add_3[112]\n    getitem_2629 = _foreach_add_3[113]\n    getitem_2630 = _foreach_add_3[114]\n    getitem_2631 = _foreach_add_3[115]\n    getitem_2632 = _foreach_add_3[116]\n    getitem_2633 = _foreach_add_3[117]\n    getitem_2634 = _foreach_add_3[118]\n    getitem_2635 = _foreach_add_3[119]\n    getitem_2636 = _foreach_add_3[120]\n    getitem_2637 = _foreach_add_3[121]\n    getitem_2638 = _foreach_add_3[122]\n    getitem_2639 = _foreach_add_3[123]\n    getitem_2640 = _foreach_add_3[124]\n    getitem_2641 = _foreach_add_3[125]\n    getitem_2642 = _foreach_add_3[126]\n    getitem_2643 = _foreach_add_3[127]\n    getitem_2644 = _foreach_add_3[128]\n    getitem_2645 = _foreach_add_3[129]\n    getitem_2646 = _foreach_add_3[130]\n    getitem_2647 = _foreach_add_3[131]\n    getitem_2648 = _foreach_add_3[132]\n    getitem_2649 = _foreach_add_3[133]\n    getitem_2650 = _foreach_add_3[134]\n    getitem_2651 = _foreach_add_3[135]\n    getitem_2652 = _foreach_add_3[136]\n    getitem_2653 = _foreach_add_3[137]\n    getitem_2654 = _foreach_add_3[138]\n    getitem_2655 = _foreach_add_3[139]\n    getitem_2656 = _foreach_add_3[140]\n    getitem_2657 = _foreach_add_3[141]\n    getitem_2658 = _foreach_add_3[142]\n    getitem_2659 = _foreach_add_3[143]\n    getitem_2660 = _foreach_add_3[144]\n    getitem_2661 = _foreach_add_3[145]\n    getitem_2662 = _foreach_add_3[146]\n    getitem_2663 = _foreach_add_3[147];  _foreach_add_3 = None\n    _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]);  getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None\n    getitem_2664 = _foreach_div_2[0]\n    getitem_2665 = _foreach_div_2[1]\n    getitem_2666 = _foreach_div_2[2]\n    getitem_2667 = _foreach_div_2[3]\n    getitem_2668 = _foreach_div_2[4]\n    getitem_2669 = _foreach_div_2[5]\n    getitem_2670 = _foreach_div_2[6]\n    getitem_2671 = _foreach_div_2[7]\n    getitem_2672 = _foreach_div_2[8]\n    getitem_2673 = _foreach_div_2[9]\n    getitem_2674 = _foreach_div_2[10]\n    getitem_2675 = _foreach_div_2[11]\n    getitem_2676 = _foreach_div_2[12]\n    getitem_2677 = _foreach_div_2[13]\n    getitem_2678 = _foreach_div_2[14]\n    getitem_2679 = _foreach_div_2[15]\n    getitem_2680 = _foreach_div_2[16]\n    getitem_2681 = _foreach_div_2[17]\n    getitem_2682 = _foreach_div_2[18]\n    getitem_2683 = _foreach_div_2[19]\n    getitem_2684 = _foreach_div_2[20]\n    getitem_2685 = _foreach_div_2[21]\n    getitem_2686 = _foreach_div_2[22]\n    getitem_2687 = _foreach_div_2[23]\n    getitem_2688 = _foreach_div_2[24]\n    getitem_2689 = _foreach_div_2[25]\n    getitem_2690 = _foreach_div_2[26]\n    getitem_2691 = _foreach_div_2[27]\n    getitem_2692 = _foreach_div_2[28]\n    getitem_2693 = _foreach_div_2[29]\n    getitem_2694 = _foreach_div_2[30]\n    getitem_2695 = _foreach_div_2[31]\n    getitem_2696 = _foreach_div_2[32]\n    getitem_2697 = _foreach_div_2[33]\n    getitem_2698 = _foreach_div_2[34]\n    getitem_2699 = _foreach_div_2[35]\n    getitem_2700 = _foreach_div_2[36]\n    getitem_2701 = _foreach_div_2[37]\n    getitem_2702 = _foreach_div_2[38]\n    getitem_2703 = _foreach_div_2[39]\n    getitem_2704 = _foreach_div_2[40]\n    getitem_2705 = _foreach_div_2[41]\n    getitem_2706 = _foreach_div_2[42]\n    getitem_2707 = _foreach_div_2[43]\n    getitem_2708 = _foreach_div_2[44]\n    getitem_2709 = _foreach_div_2[45]\n    getitem_2710 = _foreach_div_2[46]\n    getitem_2711 = _foreach_div_2[47]\n    getitem_2712 = _foreach_div_2[48]\n    getitem_2713 = _foreach_div_2[49]\n    getitem_2714 = _foreach_div_2[50]\n    getitem_2715 = _foreach_div_2[51]\n    getitem_2716 = _foreach_div_2[52]\n    getitem_2717 = _foreach_div_2[53]\n    getitem_2718 = _foreach_div_2[54]\n    getitem_2719 = _foreach_div_2[55]\n    getitem_2720 = _foreach_div_2[56]\n    getitem_2721 = _foreach_div_2[57]\n    getitem_2722 = _foreach_div_2[58]\n    getitem_2723 = _foreach_div_2[59]\n    getitem_2724 = _foreach_div_2[60]\n    getitem_2725 = _foreach_div_2[61]\n    getitem_2726 = _foreach_div_2[62]\n    getitem_2727 = _foreach_div_2[63]\n    getitem_2728 = _foreach_div_2[64]\n    getitem_2729 = _foreach_div_2[65]\n    getitem_2730 = _foreach_div_2[66]\n    getitem_2731 = _foreach_div_2[67]\n    getitem_2732 = _foreach_div_2[68]\n    getitem_2733 = _foreach_div_2[69]\n    getitem_2734 = _foreach_div_2[70]\n    getitem_2735 = _foreach_div_2[71]\n    getitem_2736 = _foreach_div_2[72]\n    getitem_2737 = _foreach_div_2[73]\n    getitem_2738 = _foreach_div_2[74]\n    getitem_2739 = _foreach_div_2[75]\n    getitem_2740 = _foreach_div_2[76]\n    getitem_2741 = _foreach_div_2[77]\n    getitem_2742 = _foreach_div_2[78]\n    getitem_2743 = _foreach_div_2[79]\n    getitem_2744 = _foreach_div_2[80]\n    getitem_2745 = _foreach_div_2[81]\n    getitem_2746 = _foreach_div_2[82]\n    getitem_2747 = _foreach_div_2[83]\n    getitem_2748 = _foreach_div_2[84]\n    getitem_2749 = _foreach_div_2[85]\n    getitem_2750 = _foreach_div_2[86]\n    getitem_2751 = _foreach_div_2[87]\n    getitem_2752 = _foreach_div_2[88]\n    getitem_2753 = _foreach_div_2[89]\n    getitem_2754 = _foreach_div_2[90]\n    getitem_2755 = _foreach_div_2[91]\n    getitem_2756 = _foreach_div_2[92]\n    getitem_2757 = _foreach_div_2[93]\n    getitem_2758 = _foreach_div_2[94]\n    getitem_2759 = _foreach_div_2[95]\n    getitem_2760 = _foreach_div_2[96]\n    getitem_2761 = _foreach_div_2[97]\n    getitem_2762 = _foreach_div_2[98]\n    getitem_2763 = _foreach_div_2[99]\n    getitem_2764 = _foreach_div_2[100]\n    getitem_2765 = _foreach_div_2[101]\n    getitem_2766 = _foreach_div_2[102]\n    getitem_2767 = _foreach_div_2[103]\n    getitem_2768 = _foreach_div_2[104]\n    getitem_2769 = _foreach_div_2[105]\n    getitem_2770 = _foreach_div_2[106]\n    getitem_2771 = _foreach_div_2[107]\n    getitem_2772 = _foreach_div_2[108]\n    getitem_2773 = _foreach_div_2[109]\n    getitem_2774 = _foreach_div_2[110]\n    getitem_2775 = _foreach_div_2[111]\n    getitem_2776 = _foreach_div_2[112]\n    getitem_2777 = _foreach_div_2[113]\n    getitem_2778 = _foreach_div_2[114]\n    getitem_2779 = _foreach_div_2[115]\n    getitem_2780 = _foreach_div_2[116]\n    getitem_2781 = _foreach_div_2[117]\n    getitem_2782 = _foreach_div_2[118]\n    getitem_2783 = _foreach_div_2[119]\n    getitem_2784 = _foreach_div_2[120]\n    getitem_2785 = _foreach_div_2[121]\n    getitem_2786 = _foreach_div_2[122]\n    getitem_2787 = _foreach_div_2[123]\n    getitem_2788 = _foreach_div_2[124]\n    getitem_2789 = _foreach_div_2[125]\n    getitem_2790 = _foreach_div_2[126]\n    getitem_2791 = _foreach_div_2[127]\n    getitem_2792 = _foreach_div_2[128]\n    getitem_2793 = _foreach_div_2[129]\n    getitem_2794 = _foreach_div_2[130]\n    getitem_2795 = _foreach_div_2[131]\n    getitem_2796 = _foreach_div_2[132]\n    getitem_2797 = _foreach_div_2[133]\n    getitem_2798 = _foreach_div_2[134]\n    getitem_2799 = _foreach_div_2[135]\n    getitem_2800 = _foreach_div_2[136]\n    getitem_2801 = _foreach_div_2[137]\n    getitem_2802 = _foreach_div_2[138]\n    getitem_2803 = _foreach_div_2[139]\n    getitem_2804 = _foreach_div_2[140]\n    getitem_2805 = _foreach_div_2[141]\n    getitem_2806 = _foreach_div_2[142]\n    getitem_2807 = _foreach_div_2[143]\n    getitem_2808 = _foreach_div_2[144]\n    getitem_2809 = _foreach_div_2[145]\n    getitem_2810 = _foreach_div_2[146]\n    getitem_2811 = _foreach_div_2[147];  _foreach_div_2 = None\n    _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]);  getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None\n    getitem_2812 = _foreach_div_3[0]\n    getitem_2813 = _foreach_div_3[1]\n    getitem_2814 = _foreach_div_3[2]\n    getitem_2815 = _foreach_div_3[3]\n    getitem_2816 = _foreach_div_3[4]\n    getitem_2817 = _foreach_div_3[5]\n    getitem_2818 = _foreach_div_3[6]\n    getitem_2819 = _foreach_div_3[7]\n    getitem_2820 = _foreach_div_3[8]\n    getitem_2821 = _foreach_div_3[9]\n    getitem_2822 = _foreach_div_3[10]\n    getitem_2823 = _foreach_div_3[11]\n    getitem_2824 = _foreach_div_3[12]\n    getitem_2825 = _foreach_div_3[13]\n    getitem_2826 = _foreach_div_3[14]\n    getitem_2827 = _foreach_div_3[15]\n    getitem_2828 = _foreach_div_3[16]\n    getitem_2829 = _foreach_div_3[17]\n    getitem_2830 = _foreach_div_3[18]\n    getitem_2831 = _foreach_div_3[19]\n    getitem_2832 = _foreach_div_3[20]\n    getitem_2833 = _foreach_div_3[21]\n    getitem_2834 = _foreach_div_3[22]\n    getitem_2835 = _foreach_div_3[23]\n    getitem_2836 = _foreach_div_3[24]\n    getitem_2837 = _foreach_div_3[25]\n    getitem_2838 = _foreach_div_3[26]\n    getitem_2839 = _foreach_div_3[27]\n    getitem_2840 = _foreach_div_3[28]\n    getitem_2841 = _foreach_div_3[29]\n    getitem_2842 = _foreach_div_3[30]\n    getitem_2843 = _foreach_div_3[31]\n    getitem_2844 = _foreach_div_3[32]\n    getitem_2845 = _foreach_div_3[33]\n    getitem_2846 = _foreach_div_3[34]\n    getitem_2847 = _foreach_div_3[35]\n    getitem_2848 = _foreach_div_3[36]\n    getitem_2849 = _foreach_div_3[37]\n    getitem_2850 = _foreach_div_3[38]\n    getitem_2851 = _foreach_div_3[39]\n    getitem_2852 = _foreach_div_3[40]\n    getitem_2853 = _foreach_div_3[41]\n    getitem_2854 = _foreach_div_3[42]\n    getitem_2855 = _foreach_div_3[43]\n    getitem_2856 = _foreach_div_3[44]\n    getitem_2857 = _foreach_div_3[45]\n    getitem_2858 = _foreach_div_3[46]\n    getitem_2859 = _foreach_div_3[47]\n    getitem_2860 = _foreach_div_3[48]\n    getitem_2861 = _foreach_div_3[49]\n    getitem_2862 = _foreach_div_3[50]\n    getitem_2863 = _foreach_div_3[51]\n    getitem_2864 = _foreach_div_3[52]\n    getitem_2865 = _foreach_div_3[53]\n    getitem_2866 = _foreach_div_3[54]\n    getitem_2867 = _foreach_div_3[55]\n    getitem_2868 = _foreach_div_3[56]\n    getitem_2869 = _foreach_div_3[57]\n    getitem_2870 = _foreach_div_3[58]\n    getitem_2871 = _foreach_div_3[59]\n    getitem_2872 = _foreach_div_3[60]\n    getitem_2873 = _foreach_div_3[61]\n    getitem_2874 = _foreach_div_3[62]\n    getitem_2875 = _foreach_div_3[63]\n    getitem_2876 = _foreach_div_3[64]\n    getitem_2877 = _foreach_div_3[65]\n    getitem_2878 = _foreach_div_3[66]\n    getitem_2879 = _foreach_div_3[67]\n    getitem_2880 = _foreach_div_3[68]\n    getitem_2881 = _foreach_div_3[69]\n    getitem_2882 = _foreach_div_3[70]\n    getitem_2883 = _foreach_div_3[71]\n    getitem_2884 = _foreach_div_3[72]\n    getitem_2885 = _foreach_div_3[73]\n    getitem_2886 = _foreach_div_3[74]\n    getitem_2887 = _foreach_div_3[75]\n    getitem_2888 = _foreach_div_3[76]\n    getitem_2889 = _foreach_div_3[77]\n    getitem_2890 = _foreach_div_3[78]\n    getitem_2891 = _foreach_div_3[79]\n    getitem_2892 = _foreach_div_3[80]\n    getitem_2893 = _foreach_div_3[81]\n    getitem_2894 = _foreach_div_3[82]\n    getitem_2895 = _foreach_div_3[83]\n    getitem_2896 = _foreach_div_3[84]\n    getitem_2897 = _foreach_div_3[85]\n    getitem_2898 = _foreach_div_3[86]\n    getitem_2899 = _foreach_div_3[87]\n    getitem_2900 = _foreach_div_3[88]\n    getitem_2901 = _foreach_div_3[89]\n    getitem_2902 = _foreach_div_3[90]\n    getitem_2903 = _foreach_div_3[91]\n    getitem_2904 = _foreach_div_3[92]\n    getitem_2905 = _foreach_div_3[93]\n    getitem_2906 = _foreach_div_3[94]\n    getitem_2907 = _foreach_div_3[95]\n    getitem_2908 = _foreach_div_3[96]\n    getitem_2909 = _foreach_div_3[97]\n    getitem_2910 = _foreach_div_3[98]\n    getitem_2911 = _foreach_div_3[99]\n    getitem_2912 = _foreach_div_3[100]\n    getitem_2913 = _foreach_div_3[101]\n    getitem_2914 = _foreach_div_3[102]\n    getitem_2915 = _foreach_div_3[103]\n    getitem_2916 = _foreach_div_3[104]\n    getitem_2917 = _foreach_div_3[105]\n    getitem_2918 = _foreach_div_3[106]\n    getitem_2919 = _foreach_div_3[107]\n    getitem_2920 = _foreach_div_3[108]\n    getitem_2921 = _foreach_div_3[109]\n    getitem_2922 = _foreach_div_3[110]\n    getitem_2923 = _foreach_div_3[111]\n    getitem_2924 = _foreach_div_3[112]\n    getitem_2925 = _foreach_div_3[113]\n    getitem_2926 = _foreach_div_3[114]\n    getitem_2927 = _foreach_div_3[115]\n    getitem_2928 = _foreach_div_3[116]\n    getitem_2929 = _foreach_div_3[117]\n    getitem_2930 = _foreach_div_3[118]\n    getitem_2931 = _foreach_div_3[119]\n    getitem_2932 = _foreach_div_3[120]\n    getitem_2933 = _foreach_div_3[121]\n    getitem_2934 = _foreach_div_3[122]\n    getitem_2935 = _foreach_div_3[123]\n    getitem_2936 = _foreach_div_3[124]\n    getitem_2937 = _foreach_div_3[125]\n    getitem_2938 = _foreach_div_3[126]\n    getitem_2939 = _foreach_div_3[127]\n    getitem_2940 = _foreach_div_3[128]\n    getitem_2941 = _foreach_div_3[129]\n    getitem_2942 = _foreach_div_3[130]\n    getitem_2943 = _foreach_div_3[131]\n    getitem_2944 = _foreach_div_3[132]\n    getitem_2945 = _foreach_div_3[133]\n    getitem_2946 = _foreach_div_3[134]\n    getitem_2947 = _foreach_div_3[135]\n    getitem_2948 = _foreach_div_3[136]\n    getitem_2949 = _foreach_div_3[137]\n    getitem_2950 = _foreach_div_3[138]\n    getitem_2951 = _foreach_div_3[139]\n    getitem_2952 = _foreach_div_3[140]\n    getitem_2953 = _foreach_div_3[141]\n    getitem_2954 = _foreach_div_3[142]\n    getitem_2955 = _foreach_div_3[143]\n    getitem_2956 = _foreach_div_3[144]\n    getitem_2957 = _foreach_div_3[145]\n    getitem_2958 = _foreach_div_3[146]\n    getitem_2959 = _foreach_div_3[147];  _foreach_div_3 = None\n    _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]);  getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None\n    getitem_2960 = _foreach_add_4[0]\n    getitem_2961 = _foreach_add_4[1]\n    getitem_2962 = _foreach_add_4[2]\n    getitem_2963 = _foreach_add_4[3]\n    getitem_2964 = _foreach_add_4[4]\n    getitem_2965 = _foreach_add_4[5]\n    getitem_2966 = _foreach_add_4[6]\n    getitem_2967 = _foreach_add_4[7]\n    getitem_2968 = _foreach_add_4[8]\n    getitem_2969 = _foreach_add_4[9]\n    getitem_2970 = _foreach_add_4[10]\n    getitem_2971 = _foreach_add_4[11]\n    getitem_2972 = _foreach_add_4[12]\n    getitem_2973 = _foreach_add_4[13]\n    getitem_2974 = _foreach_add_4[14]\n    getitem_2975 = _foreach_add_4[15]\n    getitem_2976 = _foreach_add_4[16]\n    getitem_2977 = _foreach_add_4[17]\n    getitem_2978 = _foreach_add_4[18]\n    getitem_2979 = _foreach_add_4[19]\n    getitem_2980 = _foreach_add_4[20]\n    getitem_2981 = _foreach_add_4[21]\n    getitem_2982 = _foreach_add_4[22]\n    getitem_2983 = _foreach_add_4[23]\n    getitem_2984 = _foreach_add_4[24]\n    getitem_2985 = _foreach_add_4[25]\n    getitem_2986 = _foreach_add_4[26]\n    getitem_2987 = _foreach_add_4[27]\n    getitem_2988 = _foreach_add_4[28]\n    getitem_2989 = _foreach_add_4[29]\n    getitem_2990 = _foreach_add_4[30]\n    getitem_2991 = _foreach_add_4[31]\n    getitem_2992 = _foreach_add_4[32]\n    getitem_2993 = _foreach_add_4[33]\n    getitem_2994 = _foreach_add_4[34]\n    getitem_2995 = _foreach_add_4[35]\n    getitem_2996 = _foreach_add_4[36]\n    getitem_2997 = _foreach_add_4[37]\n    getitem_2998 = _foreach_add_4[38]\n    getitem_2999 = _foreach_add_4[39]\n    getitem_3000 = _foreach_add_4[40]\n    getitem_3001 = _foreach_add_4[41]\n    getitem_3002 = _foreach_add_4[42]\n    getitem_3003 = _foreach_add_4[43]\n    getitem_3004 = _foreach_add_4[44]\n    getitem_3005 = _foreach_add_4[45]\n    getitem_3006 = _foreach_add_4[46]\n    getitem_3007 = _foreach_add_4[47]\n    getitem_3008 = _foreach_add_4[48]\n    getitem_3009 = _foreach_add_4[49]\n    getitem_3010 = _foreach_add_4[50]\n    getitem_3011 = _foreach_add_4[51]\n    getitem_3012 = _foreach_add_4[52]\n    getitem_3013 = _foreach_add_4[53]\n    getitem_3014 = _foreach_add_4[54]\n    getitem_3015 = _foreach_add_4[55]\n    getitem_3016 = _foreach_add_4[56]\n    getitem_3017 = _foreach_add_4[57]\n    getitem_3018 = _foreach_add_4[58]\n    getitem_3019 = _foreach_add_4[59]\n    getitem_3020 = _foreach_add_4[60]\n    getitem_3021 = _foreach_add_4[61]\n    getitem_3022 = _foreach_add_4[62]\n    getitem_3023 = _foreach_add_4[63]\n    getitem_3024 = _foreach_add_4[64]\n    getitem_3025 = _foreach_add_4[65]\n    getitem_3026 = _foreach_add_4[66]\n    getitem_3027 = _foreach_add_4[67]\n    getitem_3028 = _foreach_add_4[68]\n    getitem_3029 = _foreach_add_4[69]\n    getitem_3030 = _foreach_add_4[70]\n    getitem_3031 = _foreach_add_4[71]\n    getitem_3032 = _foreach_add_4[72]\n    getitem_3033 = _foreach_add_4[73]\n    getitem_3034 = _foreach_add_4[74]\n    getitem_3035 = _foreach_add_4[75]\n    getitem_3036 = _foreach_add_4[76]\n    getitem_3037 = _foreach_add_4[77]\n    getitem_3038 = _foreach_add_4[78]\n    getitem_3039 = _foreach_add_4[79]\n    getitem_3040 = _foreach_add_4[80]\n    getitem_3041 = _foreach_add_4[81]\n    getitem_3042 = _foreach_add_4[82]\n    getitem_3043 = _foreach_add_4[83]\n    getitem_3044 = _foreach_add_4[84]\n    getitem_3045 = _foreach_add_4[85]\n    getitem_3046 = _foreach_add_4[86]\n    getitem_3047 = _foreach_add_4[87]\n    getitem_3048 = _foreach_add_4[88]\n    getitem_3049 = _foreach_add_4[89]\n    getitem_3050 = _foreach_add_4[90]\n    getitem_3051 = _foreach_add_4[91]\n    getitem_3052 = _foreach_add_4[92]\n    getitem_3053 = _foreach_add_4[93]\n    getitem_3054 = _foreach_add_4[94]\n    getitem_3055 = _foreach_add_4[95]\n    getitem_3056 = _foreach_add_4[96]\n    getitem_3057 = _foreach_add_4[97]\n    getitem_3058 = _foreach_add_4[98]\n    getitem_3059 = _foreach_add_4[99]\n    getitem_3060 = _foreach_add_4[100]\n    getitem_3061 = _foreach_add_4[101]\n    getitem_3062 = _foreach_add_4[102]\n    getitem_3063 = _foreach_add_4[103]\n    getitem_3064 = _foreach_add_4[104]\n    getitem_3065 = _foreach_add_4[105]\n    getitem_3066 = _foreach_add_4[106]\n    getitem_3067 = _foreach_add_4[107]\n    getitem_3068 = _foreach_add_4[108]\n    getitem_3069 = _foreach_add_4[109]\n    getitem_3070 = _foreach_add_4[110]\n    getitem_3071 = _foreach_add_4[111]\n    getitem_3072 = _foreach_add_4[112]\n    getitem_3073 = _foreach_add_4[113]\n    getitem_3074 = _foreach_add_4[114]\n    getitem_3075 = _foreach_add_4[115]\n    getitem_3076 = _foreach_add_4[116]\n    getitem_3077 = _foreach_add_4[117]\n    getitem_3078 = _foreach_add_4[118]\n    getitem_3079 = _foreach_add_4[119]\n    getitem_3080 = _foreach_add_4[120]\n    getitem_3081 = _foreach_add_4[121]\n    getitem_3082 = _foreach_add_4[122]\n    getitem_3083 = _foreach_add_4[123]\n    getitem_3084 = _foreach_add_4[124]\n    getitem_3085 = _foreach_add_4[125]\n    getitem_3086 = _foreach_add_4[126]\n    getitem_3087 = _foreach_add_4[127]\n    getitem_3088 = _foreach_add_4[128]\n    getitem_3089 = _foreach_add_4[129]\n    getitem_3090 = _foreach_add_4[130]\n    getitem_3091 = _foreach_add_4[131]\n    getitem_3092 = _foreach_add_4[132]\n    getitem_3093 = _foreach_add_4[133]\n    getitem_3094 = _foreach_add_4[134]\n    getitem_3095 = _foreach_add_4[135]\n    getitem_3096 = _foreach_add_4[136]\n    getitem_3097 = _foreach_add_4[137]\n    getitem_3098 = _foreach_add_4[138]\n    getitem_3099 = _foreach_add_4[139]\n    getitem_3100 = _foreach_add_4[140]\n    getitem_3101 = _foreach_add_4[141]\n    getitem_3102 = _foreach_add_4[142]\n    getitem_3103 = _foreach_add_4[143]\n    getitem_3104 = _foreach_add_4[144]\n    getitem_3105 = _foreach_add_4[145]\n    getitem_3106 = _foreach_add_4[146]\n    getitem_3107 = _foreach_add_4[147];  _foreach_add_4 = None\n    copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960);  arg0_1 = getitem_2960 = copy_ = None\n    copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961);  arg1_1 = getitem_2961 = copy__1 = None\n    copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962);  arg2_1 = getitem_2962 = copy__2 = None\n    copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963);  arg3_1 = getitem_2963 = copy__3 = None\n    copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964);  arg4_1 = getitem_2964 = copy__4 = None\n    copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965);  arg5_1 = getitem_2965 = copy__5 = None\n    copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966);  arg6_1 = getitem_2966 = copy__6 = None\n    copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967);  arg7_1 = getitem_2967 = copy__7 = None\n    copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968);  arg8_1 = getitem_2968 = copy__8 = None\n    copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969);  arg9_1 = getitem_2969 = copy__9 = None\n    copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970);  arg10_1 = getitem_2970 = copy__10 = None\n    copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971);  arg11_1 = getitem_2971 = copy__11 = None\n    copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972);  arg12_1 = getitem_2972 = copy__12 = None\n    copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973);  arg13_1 = getitem_2973 = copy__13 = None\n    copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974);  arg14_1 = getitem_2974 = copy__14 = None\n    copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975);  arg15_1 = getitem_2975 = copy__15 = None\n    copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976);  arg16_1 = getitem_2976 = copy__16 = None\n    copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977);  arg17_1 = getitem_2977 = copy__17 = None\n    copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978);  arg18_1 = getitem_2978 = copy__18 = None\n    copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979);  arg19_1 = getitem_2979 = copy__19 = None\n    copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980);  arg20_1 = getitem_2980 = copy__20 = None\n    copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981);  arg21_1 = getitem_2981 = copy__21 = None\n    copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982);  arg22_1 = getitem_2982 = copy__22 = None\n    copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983);  arg23_1 = getitem_2983 = copy__23 = None\n    copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984);  arg24_1 = getitem_2984 = copy__24 = None\n    copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985);  arg25_1 = getitem_2985 = copy__25 = None\n    copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986);  arg26_1 = getitem_2986 = copy__26 = None\n    copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987);  arg27_1 = getitem_2987 = copy__27 = None\n    copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988);  arg28_1 = getitem_2988 = copy__28 = None\n    copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989);  arg29_1 = getitem_2989 = copy__29 = None\n    copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990);  arg30_1 = getitem_2990 = copy__30 = None\n    copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991);  arg31_1 = getitem_2991 = copy__31 = None\n    copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992);  arg32_1 = getitem_2992 = copy__32 = None\n    copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993);  arg33_1 = getitem_2993 = copy__33 = None\n    copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994);  arg34_1 = getitem_2994 = copy__34 = None\n    copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995);  arg35_1 = getitem_2995 = copy__35 = None\n    copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996);  arg36_1 = getitem_2996 = copy__36 = None\n    copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997);  arg37_1 = getitem_2997 = copy__37 = None\n    copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998);  arg38_1 = getitem_2998 = copy__38 = None\n    copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999);  arg39_1 = getitem_2999 = copy__39 = None\n    copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000);  arg40_1 = getitem_3000 = copy__40 = None\n    copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001);  arg41_1 = getitem_3001 = copy__41 = None\n    copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002);  arg42_1 = getitem_3002 = copy__42 = None\n    copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003);  arg43_1 = getitem_3003 = copy__43 = None\n    copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004);  arg44_1 = getitem_3004 = copy__44 = None\n    copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005);  arg45_1 = getitem_3005 = copy__45 = None\n    copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006);  arg46_1 = getitem_3006 = copy__46 = None\n    copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007);  arg47_1 = getitem_3007 = copy__47 = None\n    copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008);  arg48_1 = getitem_3008 = copy__48 = None\n    copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009);  arg49_1 = getitem_3009 = copy__49 = None\n    copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010);  arg50_1 = getitem_3010 = copy__50 = None\n    copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011);  arg51_1 = getitem_3011 = copy__51 = None\n    copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012);  arg52_1 = getitem_3012 = copy__52 = None\n    copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013);  arg53_1 = getitem_3013 = copy__53 = None\n    copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014);  arg54_1 = getitem_3014 = copy__54 = None\n    copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015);  arg55_1 = getitem_3015 = copy__55 = None\n    copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016);  arg56_1 = getitem_3016 = copy__56 = None\n    copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017);  arg57_1 = getitem_3017 = copy__57 = None\n    copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018);  arg58_1 = getitem_3018 = copy__58 = None\n    copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019);  arg59_1 = getitem_3019 = copy__59 = None\n    copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020);  arg60_1 = getitem_3020 = copy__60 = None\n    copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021);  arg61_1 = getitem_3021 = copy__61 = None\n    copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022);  arg62_1 = getitem_3022 = copy__62 = None\n    copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023);  arg63_1 = getitem_3023 = copy__63 = None\n    copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024);  arg64_1 = getitem_3024 = copy__64 = None\n    copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025);  arg65_1 = getitem_3025 = copy__65 = None\n    copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026);  arg66_1 = getitem_3026 = copy__66 = None\n    copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027);  arg67_1 = getitem_3027 = copy__67 = None\n    copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028);  arg68_1 = getitem_3028 = copy__68 = None\n    copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029);  arg69_1 = getitem_3029 = copy__69 = None\n    copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030);  arg70_1 = getitem_3030 = copy__70 = None\n    copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031);  arg71_1 = getitem_3031 = copy__71 = None\n    copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032);  arg72_1 = getitem_3032 = copy__72 = None\n    copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033);  arg73_1 = getitem_3033 = copy__73 = None\n    copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034);  arg74_1 = getitem_3034 = copy__74 = None\n    copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035);  arg75_1 = getitem_3035 = copy__75 = None\n    copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036);  arg76_1 = getitem_3036 = copy__76 = None\n    copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037);  arg77_1 = getitem_3037 = copy__77 = None\n    copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038);  arg78_1 = getitem_3038 = copy__78 = None\n    copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039);  arg79_1 = getitem_3039 = copy__79 = None\n    copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040);  arg80_1 = getitem_3040 = copy__80 = None\n    copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041);  arg81_1 = getitem_3041 = copy__81 = None\n    copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042);  arg82_1 = getitem_3042 = copy__82 = None\n    copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043);  arg83_1 = getitem_3043 = copy__83 = None\n    copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044);  arg84_1 = getitem_3044 = copy__84 = None\n    copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045);  arg85_1 = getitem_3045 = copy__85 = None\n    copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046);  arg86_1 = getitem_3046 = copy__86 = None\n    copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047);  arg87_1 = getitem_3047 = copy__87 = None\n    copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048);  arg88_1 = getitem_3048 = copy__88 = None\n    copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049);  arg89_1 = getitem_3049 = copy__89 = None\n    copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050);  arg90_1 = getitem_3050 = copy__90 = None\n    copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051);  arg91_1 = getitem_3051 = copy__91 = None\n    copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052);  arg92_1 = getitem_3052 = copy__92 = None\n    copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053);  arg93_1 = getitem_3053 = copy__93 = None\n    copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054);  arg94_1 = getitem_3054 = copy__94 = None\n    copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055);  arg95_1 = getitem_3055 = copy__95 = None\n    copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056);  arg96_1 = getitem_3056 = copy__96 = None\n    copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057);  arg97_1 = getitem_3057 = copy__97 = None\n    copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058);  arg98_1 = getitem_3058 = copy__98 = None\n    copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059);  arg99_1 = getitem_3059 = copy__99 = None\n    copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060);  arg100_1 = getitem_3060 = copy__100 = None\n    copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061);  arg101_1 = getitem_3061 = copy__101 = None\n    copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062);  arg102_1 = getitem_3062 = copy__102 = None\n    copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063);  arg103_1 = getitem_3063 = copy__103 = None\n    copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064);  arg104_1 = getitem_3064 = copy__104 = None\n    copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065);  arg105_1 = getitem_3065 = copy__105 = None\n    copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066);  arg106_1 = getitem_3066 = copy__106 = None\n    copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067);  arg107_1 = getitem_3067 = copy__107 = None\n    copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068);  arg108_1 = getitem_3068 = copy__108 = None\n    copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069);  arg109_1 = getitem_3069 = copy__109 = None\n    copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070);  arg110_1 = getitem_3070 = copy__110 = None\n    copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071);  arg111_1 = getitem_3071 = copy__111 = None\n    copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072);  arg112_1 = getitem_3072 = copy__112 = None\n    copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073);  arg113_1 = getitem_3073 = copy__113 = None\n    copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074);  arg114_1 = getitem_3074 = copy__114 = None\n    copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075);  arg115_1 = getitem_3075 = copy__115 = None\n    copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076);  arg116_1 = getitem_3076 = copy__116 = None\n    copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077);  arg117_1 = getitem_3077 = copy__117 = None\n    copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078);  arg118_1 = getitem_3078 = copy__118 = None\n    copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079);  arg119_1 = getitem_3079 = copy__119 = None\n    copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080);  arg120_1 = getitem_3080 = copy__120 = None\n    copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081);  arg121_1 = getitem_3081 = copy__121 = None\n    copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082);  arg122_1 = getitem_3082 = copy__122 = None\n    copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083);  arg123_1 = getitem_3083 = copy__123 = None\n    copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084);  arg124_1 = getitem_3084 = copy__124 = None\n    copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085);  arg125_1 = getitem_3085 = copy__125 = None\n    copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086);  arg126_1 = getitem_3086 = copy__126 = None\n    copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087);  arg127_1 = getitem_3087 = copy__127 = None\n    copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088);  arg128_1 = getitem_3088 = copy__128 = None\n    copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089);  arg129_1 = getitem_3089 = copy__129 = None\n    copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090);  arg130_1 = getitem_3090 = copy__130 = None\n    copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091);  arg131_1 = getitem_3091 = copy__131 = None\n    copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092);  arg132_1 = getitem_3092 = copy__132 = None\n    copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093);  arg133_1 = getitem_3093 = copy__133 = None\n    copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094);  arg134_1 = getitem_3094 = copy__134 = None\n    copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095);  arg135_1 = getitem_3095 = copy__135 = None\n    copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096);  arg136_1 = getitem_3096 = copy__136 = None\n    copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097);  arg137_1 = getitem_3097 = copy__137 = None\n    copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098);  arg138_1 = getitem_3098 = copy__138 = None\n    copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099);  arg139_1 = getitem_3099 = copy__139 = None\n    copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100);  arg140_1 = getitem_3100 = copy__140 = None\n    copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101);  arg141_1 = getitem_3101 = copy__141 = None\n    copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102);  arg142_1 = getitem_3102 = copy__142 = None\n    copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103);  arg143_1 = getitem_3103 = copy__143 = None\n    copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104);  arg144_1 = getitem_3104 = copy__144 = None\n    copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105);  arg145_1 = getitem_3105 = copy__145 = None\n    copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106);  arg146_1 = getitem_3106 = copy__146 = None\n    copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107);  arg147_1 = getitem_3107 = copy__147 = None\n    copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1);  arg148_1 = getitem_1 = copy__148 = None\n    copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445);  arg149_1 = getitem_445 = copy__149 = None\n    copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889);  arg150_1 = getitem_889 = copy__150 = None\n    copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444);  arg299_1 = getitem_444 = copy__151 = None\n    copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446);  arg300_1 = getitem_446 = copy__152 = None\n    copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447);  arg301_1 = getitem_447 = copy__153 = None\n    copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448);  arg302_1 = getitem_448 = copy__154 = None\n    copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449);  arg303_1 = getitem_449 = copy__155 = None\n    copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450);  arg304_1 = getitem_450 = copy__156 = None\n    copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451);  arg305_1 = getitem_451 = copy__157 = None\n    copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452);  arg306_1 = getitem_452 = copy__158 = None\n    copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453);  arg307_1 = getitem_453 = copy__159 = None\n    copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454);  arg308_1 = getitem_454 = copy__160 = None\n    copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455);  arg309_1 = getitem_455 = copy__161 = None\n    copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456);  arg310_1 = getitem_456 = copy__162 = None\n    copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457);  arg311_1 = getitem_457 = copy__163 = None\n    copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458);  arg312_1 = getitem_458 = copy__164 = None\n    copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459);  arg313_1 = getitem_459 = copy__165 = None\n    copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460);  arg314_1 = getitem_460 = copy__166 = None\n    copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461);  arg315_1 = getitem_461 = copy__167 = None\n    copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462);  arg316_1 = getitem_462 = copy__168 = None\n    copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463);  arg317_1 = getitem_463 = copy__169 = None\n    copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464);  arg318_1 = getitem_464 = copy__170 = None\n    copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465);  arg319_1 = getitem_465 = copy__171 = None\n    copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466);  arg320_1 = getitem_466 = copy__172 = None\n    copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467);  arg321_1 = getitem_467 = copy__173 = None\n    copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468);  arg322_1 = getitem_468 = copy__174 = None\n    copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469);  arg323_1 = getitem_469 = copy__175 = None\n    copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470);  arg324_1 = getitem_470 = copy__176 = None\n    copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471);  arg325_1 = getitem_471 = copy__177 = None\n    copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472);  arg326_1 = getitem_472 = copy__178 = None\n    copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473);  arg327_1 = getitem_473 = copy__179 = None\n    copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474);  arg328_1 = getitem_474 = copy__180 = None\n    copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475);  arg329_1 = getitem_475 = copy__181 = None\n    copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476);  arg330_1 = getitem_476 = copy__182 = None\n    copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477);  arg331_1 = getitem_477 = copy__183 = None\n    copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478);  arg332_1 = getitem_478 = copy__184 = None\n    copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479);  arg333_1 = getitem_479 = copy__185 = None\n    copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480);  arg334_1 = getitem_480 = copy__186 = None\n    copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481);  arg335_1 = getitem_481 = copy__187 = None\n    copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482);  arg336_1 = getitem_482 = copy__188 = None\n    copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483);  arg337_1 = getitem_483 = copy__189 = None\n    copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484);  arg338_1 = getitem_484 = copy__190 = None\n    copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485);  arg339_1 = getitem_485 = copy__191 = None\n    copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486);  arg340_1 = getitem_486 = copy__192 = None\n    copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487);  arg341_1 = getitem_487 = copy__193 = None\n    copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488);  arg342_1 = getitem_488 = copy__194 = None\n    copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489);  arg343_1 = getitem_489 = copy__195 = None\n    copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490);  arg344_1 = getitem_490 = copy__196 = None\n    copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491);  arg345_1 = getitem_491 = copy__197 = None\n    copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492);  arg346_1 = getitem_492 = copy__198 = None\n    copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493);  arg347_1 = getitem_493 = copy__199 = None\n    copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494);  arg348_1 = getitem_494 = copy__200 = None\n    copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495);  arg349_1 = getitem_495 = copy__201 = None\n    copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496);  arg350_1 = getitem_496 = copy__202 = None\n    copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497);  arg351_1 = getitem_497 = copy__203 = None\n    copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498);  arg352_1 = getitem_498 = copy__204 = None\n    copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499);  arg353_1 = getitem_499 = copy__205 = None\n    copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500);  arg354_1 = getitem_500 = copy__206 = None\n    copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501);  arg355_1 = getitem_501 = copy__207 = None\n    copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502);  arg356_1 = getitem_502 = copy__208 = None\n    copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503);  arg357_1 = getitem_503 = copy__209 = None\n    copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504);  arg358_1 = getitem_504 = copy__210 = None\n    copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505);  arg359_1 = getitem_505 = copy__211 = None\n    copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506);  arg360_1 = getitem_506 = copy__212 = None\n    copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507);  arg361_1 = getitem_507 = copy__213 = None\n    copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508);  arg362_1 = getitem_508 = copy__214 = None\n    copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509);  arg363_1 = getitem_509 = copy__215 = None\n    copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510);  arg364_1 = getitem_510 = copy__216 = None\n    copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511);  arg365_1 = getitem_511 = copy__217 = None\n    copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512);  arg366_1 = getitem_512 = copy__218 = None\n    copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513);  arg367_1 = getitem_513 = copy__219 = None\n    copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514);  arg368_1 = getitem_514 = copy__220 = None\n    copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515);  arg369_1 = getitem_515 = copy__221 = None\n    copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516);  arg370_1 = getitem_516 = copy__222 = None\n    copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517);  arg371_1 = getitem_517 = copy__223 = None\n    copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518);  arg372_1 = getitem_518 = copy__224 = None\n    copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519);  arg373_1 = getitem_519 = copy__225 = None\n    copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520);  arg374_1 = getitem_520 = copy__226 = None\n    copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521);  arg375_1 = getitem_521 = copy__227 = None\n    copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522);  arg376_1 = getitem_522 = copy__228 = None\n    copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523);  arg377_1 = getitem_523 = copy__229 = None\n    copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524);  arg378_1 = getitem_524 = copy__230 = None\n    copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525);  arg379_1 = getitem_525 = copy__231 = None\n    copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526);  arg380_1 = getitem_526 = copy__232 = None\n    copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527);  arg381_1 = getitem_527 = copy__233 = None\n    copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528);  arg382_1 = getitem_528 = copy__234 = None\n    copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529);  arg383_1 = getitem_529 = copy__235 = None\n    copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530);  arg384_1 = getitem_530 = copy__236 = None\n    copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531);  arg385_1 = getitem_531 = copy__237 = None\n    copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532);  arg386_1 = getitem_532 = copy__238 = None\n    copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533);  arg387_1 = getitem_533 = copy__239 = None\n    copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534);  arg388_1 = getitem_534 = copy__240 = None\n    copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535);  arg389_1 = getitem_535 = copy__241 = None\n    copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536);  arg390_1 = getitem_536 = copy__242 = None\n    copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537);  arg391_1 = getitem_537 = copy__243 = None\n    copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538);  arg392_1 = getitem_538 = copy__244 = None\n    copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539);  arg393_1 = getitem_539 = copy__245 = None\n    copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540);  arg394_1 = getitem_540 = copy__246 = None\n    copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541);  arg395_1 = getitem_541 = copy__247 = None\n    copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542);  arg396_1 = getitem_542 = copy__248 = None\n    copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543);  arg397_1 = getitem_543 = copy__249 = None\n    copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544);  arg398_1 = getitem_544 = copy__250 = None\n    copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545);  arg399_1 = getitem_545 = copy__251 = None\n    copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546);  arg400_1 = getitem_546 = copy__252 = None\n    copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547);  arg401_1 = getitem_547 = copy__253 = None\n    copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548);  arg402_1 = getitem_548 = copy__254 = None\n    copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549);  arg403_1 = getitem_549 = copy__255 = None\n    copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550);  arg404_1 = getitem_550 = copy__256 = None\n    copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551);  arg405_1 = getitem_551 = copy__257 = None\n    copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552);  arg406_1 = getitem_552 = copy__258 = None\n    copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553);  arg407_1 = getitem_553 = copy__259 = None\n    copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554);  arg408_1 = getitem_554 = copy__260 = None\n    copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555);  arg409_1 = getitem_555 = copy__261 = None\n    copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556);  arg410_1 = getitem_556 = copy__262 = None\n    copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557);  arg411_1 = getitem_557 = copy__263 = None\n    copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558);  arg412_1 = getitem_558 = copy__264 = None\n    copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559);  arg413_1 = getitem_559 = copy__265 = None\n    copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560);  arg414_1 = getitem_560 = copy__266 = None\n    copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561);  arg415_1 = getitem_561 = copy__267 = None\n    copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562);  arg416_1 = getitem_562 = copy__268 = None\n    copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563);  arg417_1 = getitem_563 = copy__269 = None\n    copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564);  arg418_1 = getitem_564 = copy__270 = None\n    copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565);  arg419_1 = getitem_565 = copy__271 = None\n    copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566);  arg420_1 = getitem_566 = copy__272 = None\n    copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567);  arg421_1 = getitem_567 = copy__273 = None\n    copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568);  arg422_1 = getitem_568 = copy__274 = None\n    copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569);  arg423_1 = getitem_569 = copy__275 = None\n    copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570);  arg424_1 = getitem_570 = copy__276 = None\n    copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571);  arg425_1 = getitem_571 = copy__277 = None\n    copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572);  arg426_1 = getitem_572 = copy__278 = None\n    copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573);  arg427_1 = getitem_573 = copy__279 = None\n    copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574);  arg428_1 = getitem_574 = copy__280 = None\n    copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575);  arg429_1 = getitem_575 = copy__281 = None\n    copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576);  arg430_1 = getitem_576 = copy__282 = None\n    copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577);  arg431_1 = getitem_577 = copy__283 = None\n    copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578);  arg432_1 = getitem_578 = copy__284 = None\n    copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579);  arg433_1 = getitem_579 = copy__285 = None\n    copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580);  arg434_1 = getitem_580 = copy__286 = None\n    copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581);  arg435_1 = getitem_581 = copy__287 = None\n    copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582);  arg436_1 = getitem_582 = copy__288 = None\n    copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583);  arg437_1 = getitem_583 = copy__289 = None\n    copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584);  arg438_1 = getitem_584 = copy__290 = None\n    copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585);  arg439_1 = getitem_585 = copy__291 = None\n    copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586);  arg440_1 = getitem_586 = copy__292 = None\n    copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587);  arg441_1 = getitem_587 = copy__293 = None\n    copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588);  arg442_1 = getitem_588 = copy__294 = None\n    copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589);  arg443_1 = getitem_589 = copy__295 = None\n    copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590);  arg444_1 = getitem_590 = copy__296 = None\n    copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591);  arg445_1 = getitem_591 = copy__297 = None\n    copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888);  arg446_1 = getitem_888 = copy__298 = None\n    copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890);  arg447_1 = getitem_890 = copy__299 = None\n    copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891);  arg448_1 = getitem_891 = copy__300 = None\n    copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892);  arg449_1 = getitem_892 = copy__301 = None\n    copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893);  arg450_1 = getitem_893 = copy__302 = None\n    copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894);  arg451_1 = getitem_894 = copy__303 = None\n    copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895);  arg452_1 = getitem_895 = copy__304 = None\n    copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896);  arg453_1 = getitem_896 = copy__305 = None\n    copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897);  arg454_1 = getitem_897 = copy__306 = None\n    copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898);  arg455_1 = getitem_898 = copy__307 = None\n    copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899);  arg456_1 = getitem_899 = copy__308 = None\n    copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900);  arg457_1 = getitem_900 = copy__309 = None\n    copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901);  arg458_1 = getitem_901 = copy__310 = None\n    copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902);  arg459_1 = getitem_902 = copy__311 = None\n    copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903);  arg460_1 = getitem_903 = copy__312 = None\n    copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904);  arg461_1 = getitem_904 = copy__313 = None\n    copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905);  arg462_1 = getitem_905 = copy__314 = None\n    copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906);  arg463_1 = getitem_906 = copy__315 = None\n    copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907);  arg464_1 = getitem_907 = copy__316 = None\n    copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908);  arg465_1 = getitem_908 = copy__317 = None\n    copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909);  arg466_1 = getitem_909 = copy__318 = None\n    copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910);  arg467_1 = getitem_910 = copy__319 = None\n    copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911);  arg468_1 = getitem_911 = copy__320 = None\n    copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912);  arg469_1 = getitem_912 = copy__321 = None\n    copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913);  arg470_1 = getitem_913 = copy__322 = None\n    copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914);  arg471_1 = getitem_914 = copy__323 = None\n    copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915);  arg472_1 = getitem_915 = copy__324 = None\n    copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916);  arg473_1 = getitem_916 = copy__325 = None\n    copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917);  arg474_1 = getitem_917 = copy__326 = None\n    copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918);  arg475_1 = getitem_918 = copy__327 = None\n    copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919);  arg476_1 = getitem_919 = copy__328 = None\n    copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920);  arg477_1 = getitem_920 = copy__329 = None\n    copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921);  arg478_1 = getitem_921 = copy__330 = None\n    copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922);  arg479_1 = getitem_922 = copy__331 = None\n    copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923);  arg480_1 = getitem_923 = copy__332 = None\n    copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924);  arg481_1 = getitem_924 = copy__333 = None\n    copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925);  arg482_1 = getitem_925 = copy__334 = None\n    copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926);  arg483_1 = getitem_926 = copy__335 = None\n    copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927);  arg484_1 = getitem_927 = copy__336 = None\n    copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928);  arg485_1 = getitem_928 = copy__337 = None\n    copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929);  arg486_1 = getitem_929 = copy__338 = None\n    copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930);  arg487_1 = getitem_930 = copy__339 = None\n    copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931);  arg488_1 = getitem_931 = copy__340 = None\n    copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932);  arg489_1 = getitem_932 = copy__341 = None\n    copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933);  arg490_1 = getitem_933 = copy__342 = None\n    copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934);  arg491_1 = getitem_934 = copy__343 = None\n    copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935);  arg492_1 = getitem_935 = copy__344 = None\n    copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936);  arg493_1 = getitem_936 = copy__345 = None\n    copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937);  arg494_1 = getitem_937 = copy__346 = None\n    copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938);  arg495_1 = getitem_938 = copy__347 = None\n    copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939);  arg496_1 = getitem_939 = copy__348 = None\n    copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940);  arg497_1 = getitem_940 = copy__349 = None\n    copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941);  arg498_1 = getitem_941 = copy__350 = None\n    copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942);  arg499_1 = getitem_942 = copy__351 = None\n    copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943);  arg500_1 = getitem_943 = copy__352 = None\n    copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944);  arg501_1 = getitem_944 = copy__353 = None\n    copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945);  arg502_1 = getitem_945 = copy__354 = None\n    copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946);  arg503_1 = getitem_946 = copy__355 = None\n    copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947);  arg504_1 = getitem_947 = copy__356 = None\n    copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948);  arg505_1 = getitem_948 = copy__357 = None\n    copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949);  arg506_1 = getitem_949 = copy__358 = None\n    copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950);  arg507_1 = getitem_950 = copy__359 = None\n    copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951);  arg508_1 = getitem_951 = copy__360 = None\n    copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952);  arg509_1 = getitem_952 = copy__361 = None\n    copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953);  arg510_1 = getitem_953 = copy__362 = None\n    copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954);  arg511_1 = getitem_954 = copy__363 = None\n    copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955);  arg512_1 = getitem_955 = copy__364 = None\n    copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956);  arg513_1 = getitem_956 = copy__365 = None\n    copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957);  arg514_1 = getitem_957 = copy__366 = None\n    copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958);  arg515_1 = getitem_958 = copy__367 = None\n    copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959);  arg516_1 = getitem_959 = copy__368 = None\n    copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960);  arg517_1 = getitem_960 = copy__369 = None\n    copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961);  arg518_1 = getitem_961 = copy__370 = None\n    copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962);  arg519_1 = getitem_962 = copy__371 = None\n    copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963);  arg520_1 = getitem_963 = copy__372 = None\n    copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964);  arg521_1 = getitem_964 = copy__373 = None\n    copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965);  arg522_1 = getitem_965 = copy__374 = None\n    copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966);  arg523_1 = getitem_966 = copy__375 = None\n    copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967);  arg524_1 = getitem_967 = copy__376 = None\n    copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968);  arg525_1 = getitem_968 = copy__377 = None\n    copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969);  arg526_1 = getitem_969 = copy__378 = None\n    copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970);  arg527_1 = getitem_970 = copy__379 = None\n    copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971);  arg528_1 = getitem_971 = copy__380 = None\n    copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972);  arg529_1 = getitem_972 = copy__381 = None\n    copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973);  arg530_1 = getitem_973 = copy__382 = None\n    copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974);  arg531_1 = getitem_974 = copy__383 = None\n    copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975);  arg532_1 = getitem_975 = copy__384 = None\n    copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976);  arg533_1 = getitem_976 = copy__385 = None\n    copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977);  arg534_1 = getitem_977 = copy__386 = None\n    copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978);  arg535_1 = getitem_978 = copy__387 = None\n    copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979);  arg536_1 = getitem_979 = copy__388 = None\n    copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980);  arg537_1 = getitem_980 = copy__389 = None\n    copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981);  arg538_1 = getitem_981 = copy__390 = None\n    copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982);  arg539_1 = getitem_982 = copy__391 = None\n    copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983);  arg540_1 = getitem_983 = copy__392 = None\n    copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984);  arg541_1 = getitem_984 = copy__393 = None\n    copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985);  arg542_1 = getitem_985 = copy__394 = None\n    copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986);  arg543_1 = getitem_986 = copy__395 = None\n    copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987);  arg544_1 = getitem_987 = copy__396 = None\n    copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988);  arg545_1 = getitem_988 = copy__397 = None\n    copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989);  arg546_1 = getitem_989 = copy__398 = None\n    copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990);  arg547_1 = getitem_990 = copy__399 = None\n    copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991);  arg548_1 = getitem_991 = copy__400 = None\n    copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992);  arg549_1 = getitem_992 = copy__401 = None\n    copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993);  arg550_1 = getitem_993 = copy__402 = None\n    copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994);  arg551_1 = getitem_994 = copy__403 = None\n    copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995);  arg552_1 = getitem_995 = copy__404 = None\n    copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996);  arg553_1 = getitem_996 = copy__405 = None\n    copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997);  arg554_1 = getitem_997 = copy__406 = None\n    copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998);  arg555_1 = getitem_998 = copy__407 = None\n    copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999);  arg556_1 = getitem_999 = copy__408 = None\n    copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000);  arg557_1 = getitem_1000 = copy__409 = None\n    copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001);  arg558_1 = getitem_1001 = copy__410 = None\n    copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002);  arg559_1 = getitem_1002 = copy__411 = None\n    copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003);  arg560_1 = getitem_1003 = copy__412 = None\n    copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004);  arg561_1 = getitem_1004 = copy__413 = None\n    copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005);  arg562_1 = getitem_1005 = copy__414 = None\n    copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006);  arg563_1 = getitem_1006 = copy__415 = None\n    copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007);  arg564_1 = getitem_1007 = copy__416 = None\n    copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008);  arg565_1 = getitem_1008 = copy__417 = None\n    copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009);  arg566_1 = getitem_1009 = copy__418 = None\n    copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010);  arg567_1 = getitem_1010 = copy__419 = None\n    copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011);  arg568_1 = getitem_1011 = copy__420 = None\n    copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012);  arg569_1 = getitem_1012 = copy__421 = None\n    copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013);  arg570_1 = getitem_1013 = copy__422 = None\n    copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014);  arg571_1 = getitem_1014 = copy__423 = None\n    copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015);  arg572_1 = getitem_1015 = copy__424 = None\n    copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016);  arg573_1 = getitem_1016 = copy__425 = None\n    copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017);  arg574_1 = getitem_1017 = copy__426 = None\n    copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018);  arg575_1 = getitem_1018 = copy__427 = None\n    copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019);  arg576_1 = getitem_1019 = copy__428 = None\n    copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020);  arg577_1 = getitem_1020 = copy__429 = None\n    copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021);  arg578_1 = getitem_1021 = copy__430 = None\n    copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022);  arg579_1 = getitem_1022 = copy__431 = None\n    copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023);  arg580_1 = getitem_1023 = copy__432 = None\n    copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024);  arg581_1 = getitem_1024 = copy__433 = None\n    copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025);  arg582_1 = getitem_1025 = copy__434 = None\n    copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026);  arg583_1 = getitem_1026 = copy__435 = None\n    copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027);  arg584_1 = getitem_1027 = copy__436 = None\n    copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028);  arg585_1 = getitem_1028 = copy__437 = None\n    copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029);  arg586_1 = getitem_1029 = copy__438 = None\n    copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030);  arg587_1 = getitem_1030 = copy__439 = None\n    copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031);  arg588_1 = getitem_1031 = copy__440 = None\n    copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032);  arg589_1 = getitem_1032 = copy__441 = None\n    copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033);  arg590_1 = getitem_1033 = copy__442 = None\n    copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034);  arg591_1 = getitem_1034 = copy__443 = None\n    copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035);  arg592_1 = getitem_1035 = copy__444 = None\n    copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem);  arg593_1 = getitem = copy__445 = None\n    copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2);  arg594_1 = getitem_2 = copy__446 = None\n    copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3);  arg595_1 = getitem_3 = copy__447 = None\n    copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4);  arg596_1 = getitem_4 = copy__448 = None\n    copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5);  arg597_1 = getitem_5 = copy__449 = None\n    copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6);  arg598_1 = getitem_6 = copy__450 = None\n    copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7);  arg599_1 = getitem_7 = copy__451 = None\n    copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8);  arg600_1 = getitem_8 = copy__452 = None\n    copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9);  arg601_1 = getitem_9 = copy__453 = None\n    copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10);  arg602_1 = getitem_10 = copy__454 = None\n    copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11);  arg603_1 = getitem_11 = copy__455 = None\n    copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12);  arg604_1 = getitem_12 = copy__456 = None\n    copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13);  arg605_1 = getitem_13 = copy__457 = None\n    copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14);  arg606_1 = getitem_14 = copy__458 = None\n    copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15);  arg607_1 = getitem_15 = copy__459 = None\n    copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16);  arg608_1 = getitem_16 = copy__460 = None\n    copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17);  arg609_1 = getitem_17 = copy__461 = None\n    copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18);  arg610_1 = getitem_18 = copy__462 = None\n    copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19);  arg611_1 = getitem_19 = copy__463 = None\n    copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20);  arg612_1 = getitem_20 = copy__464 = None\n    copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21);  arg613_1 = getitem_21 = copy__465 = None\n    copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22);  arg614_1 = getitem_22 = copy__466 = None\n    copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23);  arg615_1 = getitem_23 = copy__467 = None\n    copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24);  arg616_1 = getitem_24 = copy__468 = None\n    copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25);  arg617_1 = getitem_25 = copy__469 = None\n    copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26);  arg618_1 = getitem_26 = copy__470 = None\n    copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27);  arg619_1 = getitem_27 = copy__471 = None\n    copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28);  arg620_1 = getitem_28 = copy__472 = None\n    copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29);  arg621_1 = getitem_29 = copy__473 = None\n    copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30);  arg622_1 = getitem_30 = copy__474 = None\n    copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31);  arg623_1 = getitem_31 = copy__475 = None\n    copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32);  arg624_1 = getitem_32 = copy__476 = None\n    copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33);  arg625_1 = getitem_33 = copy__477 = None\n    copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34);  arg626_1 = getitem_34 = copy__478 = None\n    copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35);  arg627_1 = getitem_35 = copy__479 = None\n    copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36);  arg628_1 = getitem_36 = copy__480 = None\n    copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37);  arg629_1 = getitem_37 = copy__481 = None\n    copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38);  arg630_1 = getitem_38 = copy__482 = None\n    copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39);  arg631_1 = getitem_39 = copy__483 = None\n    copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40);  arg632_1 = getitem_40 = copy__484 = None\n    copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41);  arg633_1 = getitem_41 = copy__485 = None\n    copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42);  arg634_1 = getitem_42 = copy__486 = None\n    copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43);  arg635_1 = getitem_43 = copy__487 = None\n    copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44);  arg636_1 = getitem_44 = copy__488 = None\n    copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45);  arg637_1 = getitem_45 = copy__489 = None\n    copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46);  arg638_1 = getitem_46 = copy__490 = None\n    copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47);  arg639_1 = getitem_47 = copy__491 = None\n    copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48);  arg640_1 = getitem_48 = copy__492 = None\n    copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49);  arg641_1 = getitem_49 = copy__493 = None\n    copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50);  arg642_1 = getitem_50 = copy__494 = None\n    copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51);  arg643_1 = getitem_51 = copy__495 = None\n    copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52);  arg644_1 = getitem_52 = copy__496 = None\n    copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53);  arg645_1 = getitem_53 = copy__497 = None\n    copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54);  arg646_1 = getitem_54 = copy__498 = None\n    copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55);  arg647_1 = getitem_55 = copy__499 = None\n    copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56);  arg648_1 = getitem_56 = copy__500 = None\n    copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57);  arg649_1 = getitem_57 = copy__501 = None\n    copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58);  arg650_1 = getitem_58 = copy__502 = None\n    copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59);  arg651_1 = getitem_59 = copy__503 = None\n    copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60);  arg652_1 = getitem_60 = copy__504 = None\n    copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61);  arg653_1 = getitem_61 = copy__505 = None\n    copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62);  arg654_1 = getitem_62 = copy__506 = None\n    copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63);  arg655_1 = getitem_63 = copy__507 = None\n    copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64);  arg656_1 = getitem_64 = copy__508 = None\n    copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65);  arg657_1 = getitem_65 = copy__509 = None\n    copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66);  arg658_1 = getitem_66 = copy__510 = None\n    copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67);  arg659_1 = getitem_67 = copy__511 = None\n    copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68);  arg660_1 = getitem_68 = copy__512 = None\n    copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69);  arg661_1 = getitem_69 = copy__513 = None\n    copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70);  arg662_1 = getitem_70 = copy__514 = None\n    copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71);  arg663_1 = getitem_71 = copy__515 = None\n    copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72);  arg664_1 = getitem_72 = copy__516 = None\n    copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73);  arg665_1 = getitem_73 = copy__517 = None\n    copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74);  arg666_1 = getitem_74 = copy__518 = None\n    copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75);  arg667_1 = getitem_75 = copy__519 = None\n    copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76);  arg668_1 = getitem_76 = copy__520 = None\n    copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77);  arg669_1 = getitem_77 = copy__521 = None\n    copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78);  arg670_1 = getitem_78 = copy__522 = None\n    copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79);  arg671_1 = getitem_79 = copy__523 = None\n    copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80);  arg672_1 = getitem_80 = copy__524 = None\n    copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81);  arg673_1 = getitem_81 = copy__525 = None\n    copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82);  arg674_1 = getitem_82 = copy__526 = None\n    copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83);  arg675_1 = getitem_83 = copy__527 = None\n    copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84);  arg676_1 = getitem_84 = copy__528 = None\n    copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85);  arg677_1 = getitem_85 = copy__529 = None\n    copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86);  arg678_1 = getitem_86 = copy__530 = None\n    copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87);  arg679_1 = getitem_87 = copy__531 = None\n    copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88);  arg680_1 = getitem_88 = copy__532 = None\n    copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89);  arg681_1 = getitem_89 = copy__533 = None\n    copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90);  arg682_1 = getitem_90 = copy__534 = None\n    copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91);  arg683_1 = getitem_91 = copy__535 = None\n    copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92);  arg684_1 = getitem_92 = copy__536 = None\n    copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93);  arg685_1 = getitem_93 = copy__537 = None\n    copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94);  arg686_1 = getitem_94 = copy__538 = None\n    copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95);  arg687_1 = getitem_95 = copy__539 = None\n    copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96);  arg688_1 = getitem_96 = copy__540 = None\n    copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97);  arg689_1 = getitem_97 = copy__541 = None\n    copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98);  arg690_1 = getitem_98 = copy__542 = None\n    copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99);  arg691_1 = getitem_99 = copy__543 = None\n    copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100);  arg692_1 = getitem_100 = copy__544 = None\n    copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101);  arg693_1 = getitem_101 = copy__545 = None\n    copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102);  arg694_1 = getitem_102 = copy__546 = None\n    copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103);  arg695_1 = getitem_103 = copy__547 = None\n    copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104);  arg696_1 = getitem_104 = copy__548 = None\n    copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105);  arg697_1 = getitem_105 = copy__549 = None\n    copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106);  arg698_1 = getitem_106 = copy__550 = None\n    copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107);  arg699_1 = getitem_107 = copy__551 = None\n    copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108);  arg700_1 = getitem_108 = copy__552 = None\n    copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109);  arg701_1 = getitem_109 = copy__553 = None\n    copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110);  arg702_1 = getitem_110 = copy__554 = None\n    copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111);  arg703_1 = getitem_111 = copy__555 = None\n    copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112);  arg704_1 = getitem_112 = copy__556 = None\n    copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113);  arg705_1 = getitem_113 = copy__557 = None\n    copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114);  arg706_1 = getitem_114 = copy__558 = None\n    copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115);  arg707_1 = getitem_115 = copy__559 = None\n    copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116);  arg708_1 = getitem_116 = copy__560 = None\n    copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117);  arg709_1 = getitem_117 = copy__561 = None\n    copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118);  arg710_1 = getitem_118 = copy__562 = None\n    copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119);  arg711_1 = getitem_119 = copy__563 = None\n    copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120);  arg712_1 = getitem_120 = copy__564 = None\n    copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121);  arg713_1 = getitem_121 = copy__565 = None\n    copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122);  arg714_1 = getitem_122 = copy__566 = None\n    copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123);  arg715_1 = getitem_123 = copy__567 = None\n    copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124);  arg716_1 = getitem_124 = copy__568 = None\n    copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125);  arg717_1 = getitem_125 = copy__569 = None\n    copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126);  arg718_1 = getitem_126 = copy__570 = None\n    copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127);  arg719_1 = getitem_127 = copy__571 = None\n    copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128);  arg720_1 = getitem_128 = copy__572 = None\n    copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129);  arg721_1 = getitem_129 = copy__573 = None\n    copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130);  arg722_1 = getitem_130 = copy__574 = None\n    copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131);  arg723_1 = getitem_131 = copy__575 = None\n    copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132);  arg724_1 = getitem_132 = copy__576 = None\n    copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133);  arg725_1 = getitem_133 = copy__577 = None\n    copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134);  arg726_1 = getitem_134 = copy__578 = None\n    copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135);  arg727_1 = getitem_135 = copy__579 = None\n    copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136);  arg728_1 = getitem_136 = copy__580 = None\n    copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137);  arg729_1 = getitem_137 = copy__581 = None\n    copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138);  arg730_1 = getitem_138 = copy__582 = None\n    copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139);  arg731_1 = getitem_139 = copy__583 = None\n    copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140);  arg732_1 = getitem_140 = copy__584 = None\n    copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141);  arg733_1 = getitem_141 = copy__585 = None\n    copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142);  arg734_1 = getitem_142 = copy__586 = None\n    copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143);  arg735_1 = getitem_143 = copy__587 = None\n    copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144);  arg736_1 = getitem_144 = copy__588 = None\n    copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145);  arg737_1 = getitem_145 = copy__589 = None\n    copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146);  arg738_1 = getitem_146 = copy__590 = None\n    copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147);  arg739_1 = getitem_147 = copy__591 = None\n    return ()\n    \n# To see more debug info, please use `graph_module.print_readable()`",
+	"[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ucvyfteusmf3hkyqsnlp5ug5dh4kqnbxlr56s7pvkidgpyg2jx5] example_inputs[151]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6gu2nloxzcxrfltpbua7lqp7kkjihxd4w7afynotijqr3v7nhh5] example_inputs[152]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[164]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[165]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[177]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[178]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[184]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[261]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[262]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[263]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[264]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[265]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[266]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[267]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[268]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[269]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[270]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[271]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[272]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[273]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[274]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[275]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[276]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[277]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[278]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[279]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[280]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[281]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[282]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[283]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[284]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[285]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[286]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[287]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[288]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[289]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[290]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[291]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[292]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[293]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[294]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[295]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[296]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[297]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[298]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[299]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[300]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[301]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[302]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[303]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[304]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[305]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[306]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[307]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[308]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[309]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[310]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[311]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[312]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[313]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[314]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[315]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[316]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[317]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[318]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[319]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[320]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[321]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[322]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[323]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[324]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[325]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[326]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[327]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[328]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[329]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[330]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[331]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[332]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[333]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[334]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[335]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[336]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[337]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[338]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[339]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[340]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[341]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[342]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[343]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[344]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[345]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[346]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[347]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[348]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[349]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[350]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[351]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[352]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[353]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[354]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[355]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[356]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[357]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[358]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[359]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[360]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[361]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[362]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[363]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[364]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[365]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[366]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[367]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[368]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[369]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[370]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[371]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[372]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[373]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[374]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[375]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[376]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[377]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[378]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[379]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[380]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[381]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[382]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[383]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[384]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[385]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[386]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[387]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[388]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[389]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[390]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[391]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[392]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[393]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[394]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[395]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[396]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[397]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[398]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[399]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[400]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[401]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[402]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[403]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[404]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[405]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[406]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[407]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[408]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[409]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[410]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[411]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[412]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[413]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[414]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[415]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[416]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[417]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[418]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[419]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[420]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[421]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[422]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[423]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[424]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[425]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[426]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[427]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[428]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[429]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[430]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[431]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[432]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[433]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[434]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[435]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[436]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[437]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[438]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[439]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[440]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[441]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[442]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[443]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[444]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[445]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[446]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[447]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[448]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[449]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[450]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[451]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[452]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[453]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[454]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[455]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[456]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[457]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[458]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[459]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[460]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[461]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[462]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[463]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[464]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[465]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[466]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[467]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[468]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[469]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[470]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[471]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[472]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[473]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[474]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[475]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[476]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[477]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[478]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[479]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[480]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[481]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[482]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[483]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[484]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[485]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[486]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[487]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[488]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[489]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[490]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[491]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[492]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[493]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[494]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[495]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[496]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[497]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[498]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[499]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[500]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[501]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[502]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[503]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[504]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[505]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[506]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[507]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[508]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[509]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[510]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[511]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[512]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[513]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[514]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[515]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[516]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[517]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[518]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[519]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[520]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[521]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[522]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[523]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[524]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[525]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[526]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[527]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[528]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[529]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[530]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[531]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[532]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[533]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[534]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[535]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[536]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[537]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[538]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[539]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[540]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[541]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[542]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[543]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[544]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[545]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[546]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[547]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[548]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[549]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[550]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[551]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[552]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[553]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[554]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[555]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[556]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[557]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[558]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[559]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[560]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[561]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[562]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[563]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[564]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[565]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[566]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[567]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[568]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[569]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[570]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[571]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[572]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[573]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[574]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[575]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[576]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[577]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[578]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[579]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[580]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[581]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[582]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[583]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[584]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[585]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[586]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[587]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[588]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[589]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[590]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[591]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[592]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[593]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[594]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[595]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[596]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[597]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[598]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[599]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[600]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[601]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[602]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[603]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[604]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[605]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[606]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[607]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[608]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[609]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[610]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[611]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[612]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[613]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[614]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[615]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[616]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[617]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[618]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[619]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[620]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[621]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[622]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[623]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[624]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[625]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[626]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[627]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[628]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[629]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[630]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[631]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[632]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[633]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[634]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[635]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[636]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[637]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[638]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[639]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[640]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[641]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[642]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[643]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[644]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[645]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[646]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[647]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[648]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[649]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[650]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[651]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[652]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[653]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[654]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[655]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[656]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[657]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[658]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[659]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[660]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[661]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[662]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[663]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[664]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[665]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[666]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[667]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[668]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[669]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[670]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[671]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[672]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[673]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[674]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[675]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[676]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[677]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[678]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[679]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[680]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[681]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[682]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[683]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[684]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[685]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[686]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[687]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[688]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[689]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[690]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[691]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[692]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[693]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[694]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[695]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[696]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[697]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[698]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[699]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[700]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[701]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[702]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[703]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[704]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[705]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[706]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[707]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[708]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[709]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[710]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[711]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[712]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[713]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[714]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[715]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[716]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[717]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[718]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[719]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[720]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[721]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[722]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[723]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[724]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[725]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[726]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[727]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[728]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[729]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[730]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[731]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[732]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[733]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[734]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[735]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[736]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[737]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[738]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[739]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
+	"[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
+	"[t755of6lmsc7np3j6spka2x5yvicie732qv4wx6uu67rphf6elu] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739]",
+	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] fx_kwargs[user_visible_outputs]: {}",
+	"[5gxbt6glr3gti63xp7cch6ofdqfxvag7hsiwxbut4if4xrx6d4g] inputs_to_check[0]: 151",
+	"[iaarrh4a5kr5bv73kkmrhkca4ysumu2vh65kdzworlx74re2dpo] inputs_to_check[1]: 152",
+	"[qmhmrekvpai4tc7rejm43nkaq3mgt2cy6w6mzkdg2pdzt4xbwx2] inputs_to_check[2]: 153",
+	"[qrczcfquzsvwccgmqugssyaib555w6hfdt4shqzvb6brfm3i5el] inputs_to_check[3]: 154",
+	"[6osxch7or66drdp4fy276u75unoezfbp32qtojzfami7nbek767] inputs_to_check[4]: 155",
+	"[o3fk4nlt4btpkqljmyxroatirrdhqjr4d264i2tywc6raomfjsw] inputs_to_check[5]: 156",
+	"[33b7c6n5un2rbt5kdgtbsh6c64len6cjzmios66nja6gia4ojcx] inputs_to_check[6]: 157",
+	"[ccishwh2vlgdi4q6qdu3en4gukgptvbrqyx5rofx72wz3bicnzy] inputs_to_check[7]: 158",
+	"[6pn6oydkkil5wvbpu5uvdffsyymbzhxx3t2skamg4wp5vtb3n5k] inputs_to_check[8]: 159",
+	"[alex6ca6gpzizomfu3wq3xj36jnymygy7hiroowxhaypic6tskv] inputs_to_check[9]: 160",
+	"[bivqezf4ymabhorni5gd4fe3urab3kvepwenq5gmvosf2pavdd5] inputs_to_check[10]: 161",
+	"[asq5khhnfffkvmnnmgeoqsye4l64y7kkvyk3snk2tyrgf2qb5mi] inputs_to_check[11]: 162",
+	"[wz2lj7dbnoawsawyjhobr6oa52jgjntn3o3lmrtkb4bmzfapwhc] inputs_to_check[12]: 163",
+	"[ngbs2fex7zmmncfaogkkrqimpfgq4wjlkqmf3tcyeswwe3hg3od] inputs_to_check[13]: 164",
+	"[fssplvrotxdu6guo4defun7h45ns624vjjonloaz7etd7ggxjg4] inputs_to_check[14]: 165",
+	"[sc6cvy6nxzmjuelrp2whvlhran2f4f6elzbuslbei3dfnaq6qkv] inputs_to_check[15]: 166",
+	"[p44ecp3xwjohf6mop4nzmt5wxi7uzcii63xm5kryejmipxkfjev] inputs_to_check[16]: 167",
+	"[xngi5shtw7jcqe2utazf35f7dj5ypzfuduqyypjo6jvv32knjf3] inputs_to_check[17]: 168",
+	"[niz4gr5x3ya3ukuhf4c7rnhhrswyan3kbzclc6g52u3kvurqmf4] inputs_to_check[18]: 169",
+	"[bfsiwqbrjfxsnzozsohdqmqjqwa5itj3abqdtdwxf7vxxw2t7iq] inputs_to_check[19]: 170",
+	"[ditguzdbhtwz6l4chqi3z6cziyi2pct43zhzf5zaragfhqf5akd] inputs_to_check[20]: 171",
+	"[e3h4w2vfojacabco57r6aj43dgqrhfx545yrdu7qzxkvjyf52t5] inputs_to_check[21]: 172",
+	"[mfhhdultow3ovihpso6dtsdpntivwjcpfismlaqwsjc6iainhtu] inputs_to_check[22]: 173",
+	"[kpyorpxsozia3z5wqrbnqnnff6bma7xllpbedgwp4gp76wwwegc] inputs_to_check[23]: 174",
+	"[lvgyjizlzse4yknfjmvl2uqg4ffygwpuddjgunbxcbjm5g5d5fw] inputs_to_check[24]: 175",
+	"[qgbusewwprhncdk5ahq4fytqvx7fhrecfetchddqudoq7lkyx3b] inputs_to_check[25]: 176",
+	"[hmzhjnkpczbvqnshjcfp45u74kngsbufwspp5fa5e4zidpcwwcs] inputs_to_check[26]: 177",
+	"[oqg53nqin266dynz7o2z27qo3hcxucd5yc3or2656uoqs6quan2] inputs_to_check[27]: 178",
+	"[k6swhf5ty2zmij2rxhvvpe7jwdxx6llqawhiqixvpvjgc6dhkl6] inputs_to_check[28]: 179",
+	"[a5s6scmr5zbjnm5a6isxptkuttpfh2uv2g7tza7ci6z366qvhph] inputs_to_check[29]: 180",
+	"[ma4hmi2zhz2v5pv6nw7owyw3z3lswrc5nveayyb3j5iastoc7zl] inputs_to_check[30]: 181",
+	"[4nwllnnjxh7swihocoe7pvsijtlpzpz6yjwckaahmgpaeimozt7] inputs_to_check[31]: 182",
+	"[wzkita6x3wixk2x6zsfnpzfqrfaezroxu5xnw7rq7ox7vqgu476] inputs_to_check[32]: 183",
+	"[xrjqiropfaxwzdwzzhyx6e6c7hjnkppzronjqgpzo6hbpi6yr4g] inputs_to_check[33]: 184",
+	"[urai455awfpx3bjmqadnlbnbklixi676brusjoruximpnjo76n2] inputs_to_check[34]: 185",
+	"[eermw7oa3whtf4qdg6cogh7s3xez3tfiest32uplhnwmagpouh6] inputs_to_check[35]: 186",
+	"[64xsnjmjbzfylxidk5yl6hsvuzswofeqbq3zuv7a6bkdopfeapn] inputs_to_check[36]: 187",
+	"[6kuevh47g3elelxoo5ac7cmo3r2fh2ygbhs6qyljspkz4y6r7r7] inputs_to_check[37]: 188",
+	"[ihnc7ngbkirbzwxoyjfhpwki2ewnnpkuzxlegp6fmw6fykdxxj3] inputs_to_check[38]: 189",
+	"[g3ay2xbjws2ov73c4lkobfibuq4wxwxe75uogzdg7crgtzlagn5] inputs_to_check[39]: 190",
+	"[6lydlqaer6b3qvlthv7uluevii5gvxgissp4oodsoye43zyvm7f] inputs_to_check[40]: 191",
+	"[mrm72xpjwecc4eczy6w3ndrca6qgx4ssucludsfllsadesjz7pl] inputs_to_check[41]: 192",
+	"[7cz46kewyqtcfh7adjmk5i2ljoq5v44ofijq4gmlca7gjy55c6r] inputs_to_check[42]: 193",
+	"[2ubcxo5fpwyipcg26qkwk7dfk4ci2edpvwxh4fhvgvstq4cf3ke] inputs_to_check[43]: 194",
+	"[idxzwtb6yotm5u6qhotbqxbuytqogl4lyuzcp4f4rpiekingxrj] inputs_to_check[44]: 195",
+	"[sxompataxg2kpp6lvmimnzadenerjisuicfkfuwm5exoinhfbsg] inputs_to_check[45]: 196",
+	"[5pmk7sv27s2bi54s3kwyduqs4ly256qfb2hfrlqfamtcsbur3iv] inputs_to_check[46]: 197",
+	"[pujj5ix4dbdajeweoew7fe743v6v6wscq7k7pjsqiqopewlh6s4] inputs_to_check[47]: 198",
+	"[tfk4gvmeljn6oc7yzg7ablm5slfgj5iwldvib27lgy3acro6g77] inputs_to_check[48]: 199",
+	"[3yqkxangefsazunaw2ibltnnexjixpvosdxyq7kipwrmhng4d66] inputs_to_check[49]: 200",
+	"[dek4vtwl3t4tioy2oedefor7hqzq7doc3fj4wwdmgrfpt773mvr] inputs_to_check[50]: 201",
+	"[eyzompn7rqbpbwprodhvszb4fjs3fubclamjylwqsna5imftnou] inputs_to_check[51]: 202",
+	"[cna2jzzfijl2grhnqpag2peenci7zfourhgcdzidromdrqdyvwm] inputs_to_check[52]: 203",
+	"[m5mnhtreky3cpmvgnfmbkri3pmhs22tu3kahhkdxv2q67t7rtxk] inputs_to_check[53]: 204",
+	"[pf3yxn6pwjw3apolzviv77ube4xeqq2n2lgwcduyjvzgiyxg45s] inputs_to_check[54]: 205",
+	"[zlqnl55vmxcplhlix7khtasmq2gdecqd7jpore57pll2by4u67y] inputs_to_check[55]: 206",
+	"[6u3htmimfebyyyavsbzctid7bqe3p2vzitaht7rhqdc6l653asq] inputs_to_check[56]: 207",
+	"[vnxrp3cswdykkkdcda2rykgrj6p7mbsgoq2euf3nhebgbrbdnah] inputs_to_check[57]: 208",
+	"[xkuyzvn72atoye7xvdr7nkkl6r43muegtld7i23uic3gez4op3w] inputs_to_check[58]: 209",
+	"[mc4hinl3b4abbhnnd6kjw3mpbdnhcszhce746aznurtp7rckvqe] inputs_to_check[59]: 210",
+	"[ofm6prxr6hqz2u5z3oywwp5635di572xearfxgeqikq7ir5zyer] inputs_to_check[60]: 211",
+	"[ib352syzxfoxetnwcwmr562kq6zxh3ba6k6ozr7vmgirzngz445] inputs_to_check[61]: 212",
+	"[z7vzn4qm5gv2ec4zm5oa552msg56z4an6jyi43vpqrh6rcwtww5] inputs_to_check[62]: 213",
+	"[6ijaweudkgtayjajytjpkgptbxqygprffq2iv65twukqg2ks24j] inputs_to_check[63]: 214",
+	"[zr4ja2xbcw5fkklyjkk4dqkrixbqthrmy4gx6wolog3g6twxagb] inputs_to_check[64]: 215",
+	"[f7rs6g77lmqs6rceoayty7ukws77rxwoi2litnshxyvn2l4qcrs] inputs_to_check[65]: 216",
+	"[iq4ks6jqbobe4ub7eedwgwhm6u7dujn4pdk6xtzlsutw35gihbh] inputs_to_check[66]: 217",
+	"[c4tbwksvxlxtiymirqu6houav6ecq5pr3zf4phc5ksuu6ccao5t] inputs_to_check[67]: 218",
+	"[i3gn36xaneuwkxpjtyp3iiaeudseihrk47h5len33wjeyzt6ez5] inputs_to_check[68]: 219",
+	"[2hfliifbgstom3wq5au6yesetrbhjmazl3j2z7o4wvztaigyd7p] inputs_to_check[69]: 220",
+	"[jsavgbo66sdqebklxk3p76jjgnvror75kkpwbfrr2grkuudknxt] inputs_to_check[70]: 221",
+	"[nl6eg6mm4g66lxwzopu5webjhh4pq52imycgdnbzw5sdafpfzuh] inputs_to_check[71]: 222",
+	"[uulsqibga6yzqtpej2uf4km34ygpbdt4gya2hth26yioguk4rlq] inputs_to_check[72]: 223",
+	"[ptaz43zzcc5wqnllxp3fe7pvbo75xmcjlabttblccy46hygeozy] inputs_to_check[73]: 224",
+	"[wlmb2nn3nq5s77bhgmozz6k5xgxru6empq7wgpphhcgprnpnc7o] inputs_to_check[74]: 225",
+	"[ltksjik3ctg6uqqzzgdjyvza6wpx757hanbg2zoy3qa4k3a4vpx] inputs_to_check[75]: 226",
+	"[7u7ifib46vahqccsmg654f3uzmjbluuqepiqql3s5ozwtzqz7pp] inputs_to_check[76]: 227",
+	"[osag4eozwslt5b7yuzth3lgba32eotdkqtr2kw4ey4i43bgvvrs] inputs_to_check[77]: 228",
+	"[zwdkykxjgt356ykebzld3rzgsfc5zlfk5st4we3ykzkaba3oqex] inputs_to_check[78]: 229",
+	"[iungcnzcibs3necrx3njdt2ckikflhexkoicbep7tvcvtj5ly5d] inputs_to_check[79]: 230",
+	"[bwmghobfcwh2lrdjqskkhe6u3vox2fbz53b65rgrzlmndirmzbm] inputs_to_check[80]: 231",
+	"[qhj52t7zdp3oargrkm4bg6aao35lsfeuhsm5pgevylkqroeb4lc] inputs_to_check[81]: 232",
+	"[bz4ayxadi54u4x7rrse46x6v7bfpgk5qy4scu6kg76fjuxlpy6u] inputs_to_check[82]: 233",
+	"[gw4sqgphdvlxdqktwxjso65pxpymqslnhgr6l4eyswl5izdixw6] inputs_to_check[83]: 234",
+	"[ewdr2tnhafkagyyp46wn5led3h754p4nzttu3w54uhxrqux3vvn] inputs_to_check[84]: 235",
+	"[4e7a3dp2ygblswjy2t4s5ytwvfgeppo6v5xajwfebrzqvxl5c3z] inputs_to_check[85]: 236",
+	"[dnrgqiivi7fu47qt3k5ea43mmzonrx62hvlvj7p4zs2whj47y7d] inputs_to_check[86]: 237",
+	"[pyoye6lh4ebpuzincukqsblo7sz2ok35q5n23bykkflupvr6b3x] inputs_to_check[87]: 238",
+	"[zzav4ck6zf5ii2aadgk45satnvsrcelh7lwasful7siezs7emg2] inputs_to_check[88]: 239",
+	"[jqli3sayeay6jxebdo6gg7uiifocbslkvp3hv7kgpgetcctr6kk] inputs_to_check[89]: 240",
+	"[xfscvbmf5xe24dzns4sojux47uhkj3rx5escbpdnh74elnkufk4] inputs_to_check[90]: 241",
+	"[h22xhecdg6loiwavuwtlmkjqgxj23bujpayu7r7cnuwow22zy2e] inputs_to_check[91]: 242",
+	"[ffeweldbvparqtiiuqrj2k57nqdefczqg2x5n36j3cgfubpzl2r] inputs_to_check[92]: 243",
+	"[ccpnfy7cohyrspuggdpbc7wpz4ul2fjni56cul6rgpztmmgrmbz] inputs_to_check[93]: 244",
+	"[wdxh2vlxp5oip6lbdhdc5bur247q4qljosyhafat4nqrygspikw] inputs_to_check[94]: 245",
+	"[imw7rafsd3kl6dhscu76cm37kz7cnlrn4cz4y77hnvu6epvrakh] inputs_to_check[95]: 246",
+	"[s7byzhoacpzsci2aqrvogeb7f5f7egbzqw5uujgvswtjutgh33t] inputs_to_check[96]: 247",
+	"[usr2ovytp5xi5pvkziuuu7bpg7zfa44xckrw7mx42ad2sixrfjw] inputs_to_check[97]: 248",
+	"[spsub4jhn3s5o5rdysoeyv5mqczcmmn3t3hmuflioyvd4im4pfe] inputs_to_check[98]: 249",
+	"[7c6i6h6bfell5u33q6rcv25lpgmk4jah3uhjjx6bjevvjnshoim] inputs_to_check[99]: 250",
+	"[ah4sag2igwizdxkml6voaf342455hrlpr6cesdepe5njv6zahlq] inputs_to_check[100]: 251",
+	"[5svbus4u4wap6a3z767wrjlymc7g7qft4ugaae65e5t7tvvigpv] inputs_to_check[101]: 252",
+	"[6dwei2ltmufaindqw57by4jqhptur5xijexpjttzbqiw5xq7ufb] inputs_to_check[102]: 253",
+	"[r557yxhzgnvuaqxq2y2aisgxqacm3cndl3efydbr3l5u7t6vaao] inputs_to_check[103]: 254",
+	"[ouw6rzfmq6mznqvkp4ouhr3fzo3ljmtrqmyrm3pitapfmmfcq67] inputs_to_check[104]: 255",
+	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inputs_to_check[105]: 256",
+	"[bmucc7bpdzrvbf5petlmybacuupvmc7agiupfu54h73xhqupvaf] inputs_to_check[106]: 257",
+	"[26uxlcwvg3bcy54iimat7oht3bxo7jvlwbqdppzk5zwkoxyv7cb] inputs_to_check[107]: 258",
+	"[6ficbgfpvxoz32hthootgscev5fx7mrus4lxsrrnugdtmomziuz] inputs_to_check[108]: 259",
+	"[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[109]: 260",
+	"[677wque2vebz2rd32qmi2owsgvljzdjthgg7zhc67nb7s6z46xw] inputs_to_check[110]: 261",
+	"[m7xjumsvlwcemhzme2dqw2usksuuan5qqnmqjaetwxiar4s4j6c] inputs_to_check[111]: 262",
+	"[paeekzgtlydskvlpjv3e3j2zbrofnrddnfj3b6ronaopjyxkseo] inputs_to_check[112]: 263",
+	"[rwjfz3vqhvl3rixynv6iipvly4la2yaqouw2ax5qrgr43ekinaz] inputs_to_check[113]: 264",
+	"[3rcgr2jikn5dxbda5qrx57cu7lqsoqexxqcfzwsazgqojrjzsua] inputs_to_check[114]: 265",
+	"[uqxikezud5mf2aqkp7seqvyze7hpn2o5c2yez6uydmlvjn2eh5v] inputs_to_check[115]: 266",
+	"[wjcwolwbnxwp5mof62qvgpjq3ar5rzt6kkdytt6wev4udsfqe6x] inputs_to_check[116]: 267",
+	"[veogf6bsf6ikphzxxhjgpzn5jqx3g55t4aoyurw7pm76yiy7zt2] inputs_to_check[117]: 268",
+	"[vjdrwtc6q6dw6qossmadxe3ct3fx342xxf5ebrhb42tbtrjmtoj] inputs_to_check[118]: 269",
+	"[fzi2ibchn6t3srl4xj73y4mywe6m4to7ns3ffl2tsei46mebm5l] inputs_to_check[119]: 270",
+	"[54mteumeehrhr42ajb2mmomf5sssoeewc5jl6nqlnib5ohxb3qd] inputs_to_check[120]: 271",
+	"[yk2ib3hhazfcmbecutfzw356dveofwjm5aokurcon6brky4fv62] inputs_to_check[121]: 272",
+	"[qmgadyz6iluhzeytokynhwx3fiybdqurux3poi3kx7xdkpp24io] inputs_to_check[122]: 273",
+	"[32k7zywxkomlofwth2mo5yip7d4f63vdnnwqucsjvfiviwhzqwj] inputs_to_check[123]: 274",
+	"[ex5ejsi3yiu26ymqgxsfzxfongdoyfsvtn6wbfmjmqan26mywvv] inputs_to_check[124]: 275",
+	"[3r3pihjgebqvjx6yoh4q36k6bmloajxb2wp7mpovqfmbgvtihjx] inputs_to_check[125]: 276",
+	"[jcfhgvbhktjahasdkcg4j22c4iu5wgbqo2by6mvnhkizuyl7adt] inputs_to_check[126]: 277",
+	"[qysgpar3mwuhkwfmkdvwppvqn2rc3wkswoy4l5242blg3s6nomq] inputs_to_check[127]: 278",
+	"[sj5qpjzwfnrnrslghhva6z7fypbpicxjbczgdsscdg7ty6dloke] inputs_to_check[128]: 279",
+	"[dh6oykkvzpw4hh2l2kyq3n3oiaawqasgyps2bki6ouaqwr5o4c5] inputs_to_check[129]: 280",
+	"[bqqqhzw6zzkrdgeg4wed3ge2u7wrxxweyb7ikuugm2lg5bw2low] inputs_to_check[130]: 281",
+	"[lpgee42ktycd2ec7bvvfmts5czoojvy5rglm2fz4boqbzvem3mz] inputs_to_check[131]: 282",
+	"[xxcd2riuuqmc632el2www5z43brah2hzj66qz5c2bl4txi6tphi] inputs_to_check[132]: 283",
+	"[lzq356tk2daemd3eejrqwmxfuprmzobz2v54vhsfmppeq35midf] inputs_to_check[133]: 284",
+	"[jeqk32o5ugk777bosvm26wli4suonie2j7xeyvcnflm6sh2su4f] inputs_to_check[134]: 285",
+	"[3nrlzlfgqdttgmpwe6ae4donvgjkzv5xalpsx6dkyop7d5e7owg] inputs_to_check[135]: 286",
+	"[2cfsjfkfvrnfsi2dtyhpmzmogddssh6uxfsq3ydka2snuhaqy4s] inputs_to_check[136]: 287",
+	"[d27xrkheycncdo3uzfumqtruedyl7pv2ur7to3lkeg7cjfaawja] inputs_to_check[137]: 288",
+	"[o65ulls4ibkqdqeuckzqgselabavcbzln6kizmseggtkrra6k4k] inputs_to_check[138]: 289",
+	"[e5avxq5la5yhcl3jslzu2qsr4tcolx35t2ujwwjr7lsqbhx2gk3] inputs_to_check[139]: 290",
+	"[cbjwd3zv52u3h7bomxvmf6ynkx4wmtm6bqzzwkzlmyr2ict4kfn] inputs_to_check[140]: 291",
+	"[fpvvfys36hfg7uwq5l6ekyjnvf3tjpbf4d5cxo4webm7epzhpvt] inputs_to_check[141]: 292",
+	"[bhuauv3brrxmr45r7yueymn76n3bwlyfrcrqtsbuok4ipqa5d2q] inputs_to_check[142]: 293",
+	"[n5u6kpqzxtau4hisgec3wulumses6yh323wd6fnttpwm42i3j7x] inputs_to_check[143]: 294",
+	"[rtapjyb4o2hwk4hyf4ep7oeikdbv4zq2ni4dilcfjnjo4sgwzvz] inputs_to_check[144]: 295",
+	"[6aadk5hp6aqszjgpca65txkgn7cp4wttn7o6q4uv5br7qu4ubxq] inputs_to_check[145]: 296",
+	"[ptxf4kphvduiofe7xbem4isrkenfdki3oegb53qdm2jzkbn26ed] inputs_to_check[146]: 297",
+	"[5pxadwvblqbojkxsf7lbkowi52nvhflb3rx456ro3uostqlb7ky] inputs_to_check[147]: 298",
+	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
+	"[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)",
+	"[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}",
+	"[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
+	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
+	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
+	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
+	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
+	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
+	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
+	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
+	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
+	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
+	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None",
+	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
+	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
+	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
+	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
+	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
+	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
+	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
+	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
+	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
+	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
+	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
+	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
+	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
+	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
+	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
+	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
+	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
+	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
+	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None",
+	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
+	"[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
+	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
+	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
+	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
+	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
+	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
+	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
+	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
+	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
+	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
+	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
+	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
+	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
+	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
+	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
+	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
+	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
+	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
+	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
+	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
+	"[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"
+	]
+	},
+	"ph": "i",
+	"pid": 0,
+	"s": "p"
+	}
+V0806 13:56:22.387000 4107173 torch/_inductor/codecache.py:1326] {"artifact": {"name": "fx_graph_cache_hash", "encoding": "json"}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "f04a88277d7f7865bde907ed3d2b2c2e"}
+	{"key": "f2hzi4mmzauwdbyib6zmykorraxjbqftyvglo6f4mz2b36wljiti", "cache_state": "miss", "components": ["[i5hietdxt6dlkcrwbpsvei6udef6z3eec54zo7cpjzbybmgvi7b] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1, arg148_1, arg149_1, arg150_1, arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1, arg299_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1, arg446_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1, arg593_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1):\n    _foreach_add = torch.ops.aten._foreach_add.Scalar([arg593_1, arg148_1, arg594_1, arg595_1, arg596_1, arg597_1, arg598_1, arg599_1, arg600_1, arg601_1, arg602_1, arg603_1, arg604_1, arg605_1, arg606_1, arg607_1, arg608_1, arg609_1, arg610_1, arg611_1, arg612_1, arg613_1, arg614_1, arg615_1, arg616_1, arg617_1, arg618_1, arg619_1, arg620_1, arg621_1, arg622_1, arg623_1, arg624_1, arg625_1, arg626_1, arg627_1, arg628_1, arg629_1, arg630_1, arg631_1, arg632_1, arg633_1, arg634_1, arg635_1, arg636_1, arg637_1, arg638_1, arg639_1, arg640_1, arg641_1, arg642_1, arg643_1, arg644_1, arg645_1, arg646_1, arg647_1, arg648_1, arg649_1, arg650_1, arg651_1, arg652_1, arg653_1, arg654_1, arg655_1, arg656_1, arg657_1, arg658_1, arg659_1, arg660_1, arg661_1, arg662_1, arg663_1, arg664_1, arg665_1, arg666_1, arg667_1, arg668_1, arg669_1, arg670_1, arg671_1, arg672_1, arg673_1, arg674_1, arg675_1, arg676_1, arg677_1, arg678_1, arg679_1, arg680_1, arg681_1, arg682_1, arg683_1, arg684_1, arg685_1, arg686_1, arg687_1, arg688_1, arg689_1, arg690_1, arg691_1, arg692_1, arg693_1, arg694_1, arg695_1, arg696_1, arg697_1, arg698_1, arg699_1, arg700_1, arg701_1, arg702_1, arg703_1, arg704_1, arg705_1, arg706_1, arg707_1, arg708_1, arg709_1, arg710_1, arg711_1, arg712_1, arg713_1, arg714_1, arg715_1, arg716_1, arg717_1, arg718_1, arg719_1, arg720_1, arg721_1, arg722_1, arg723_1, arg724_1, arg725_1, arg726_1, arg727_1, arg728_1, arg729_1, arg730_1, arg731_1, arg732_1, arg733_1, arg734_1, arg735_1, arg736_1, arg737_1, arg738_1, arg739_1], 1)\n    getitem = _foreach_add[0]\n    getitem_1 = _foreach_add[1]\n    getitem_2 = _foreach_add[2]\n    getitem_3 = _foreach_add[3]\n    getitem_4 = _foreach_add[4]\n    getitem_5 = _foreach_add[5]\n    getitem_6 = _foreach_add[6]\n    getitem_7 = _foreach_add[7]\n    getitem_8 = _foreach_add[8]\n    getitem_9 = _foreach_add[9]\n    getitem_10 = _foreach_add[10]\n    getitem_11 = _foreach_add[11]\n    getitem_12 = _foreach_add[12]\n    getitem_13 = _foreach_add[13]\n    getitem_14 = _foreach_add[14]\n    getitem_15 = _foreach_add[15]\n    getitem_16 = _foreach_add[16]\n    getitem_17 = _foreach_add[17]\n    getitem_18 = _foreach_add[18]\n    getitem_19 = _foreach_add[19]\n    getitem_20 = _foreach_add[20]\n    getitem_21 = _foreach_add[21]\n    getitem_22 = _foreach_add[22]\n    getitem_23 = _foreach_add[23]\n    getitem_24 = _foreach_add[24]\n    getitem_25 = _foreach_add[25]\n    getitem_26 = _foreach_add[26]\n    getitem_27 = _foreach_add[27]\n    getitem_28 = _foreach_add[28]\n    getitem_29 = _foreach_add[29]\n    getitem_30 = _foreach_add[30]\n    getitem_31 = _foreach_add[31]\n    getitem_32 = _foreach_add[32]\n    getitem_33 = _foreach_add[33]\n    getitem_34 = _foreach_add[34]\n    getitem_35 = _foreach_add[35]\n    getitem_36 = _foreach_add[36]\n    getitem_37 = _foreach_add[37]\n    getitem_38 = _foreach_add[38]\n    getitem_39 = _foreach_add[39]\n    getitem_40 = _foreach_add[40]\n    getitem_41 = _foreach_add[41]\n    getitem_42 = _foreach_add[42]\n    getitem_43 = _foreach_add[43]\n    getitem_44 = _foreach_add[44]\n    getitem_45 = _foreach_add[45]\n    getitem_46 = _foreach_add[46]\n    getitem_47 = _foreach_add[47]\n    getitem_48 = _foreach_add[48]\n    getitem_49 = _foreach_add[49]\n    getitem_50 = _foreach_add[50]\n    getitem_51 = _foreach_add[51]\n    getitem_52 = _foreach_add[52]\n    getitem_53 = _foreach_add[53]\n    getitem_54 = _foreach_add[54]\n    getitem_55 = _foreach_add[55]\n    getitem_56 = _foreach_add[56]\n    getitem_57 = _foreach_add[57]\n    getitem_58 = _foreach_add[58]\n    getitem_59 = _foreach_add[59]\n    getitem_60 = _foreach_add[60]\n    getitem_61 = _foreach_add[61]\n    getitem_62 = _foreach_add[62]\n    getitem_63 = _foreach_add[63]\n    getitem_64 = _foreach_add[64]\n    getitem_65 = _foreach_add[65]\n    getitem_66 = _foreach_add[66]\n    getitem_67 = _foreach_add[67]\n    getitem_68 = _foreach_add[68]\n    getitem_69 = _foreach_add[69]\n    getitem_70 = _foreach_add[70]\n    getitem_71 = _foreach_add[71]\n    getitem_72 = _foreach_add[72]\n    getitem_73 = _foreach_add[73]\n    getitem_74 = _foreach_add[74]\n    getitem_75 = _foreach_add[75]\n    getitem_76 = _foreach_add[76]\n    getitem_77 = _foreach_add[77]\n    getitem_78 = _foreach_add[78]\n    getitem_79 = _foreach_add[79]\n    getitem_80 = _foreach_add[80]\n    getitem_81 = _foreach_add[81]\n    getitem_82 = _foreach_add[82]\n    getitem_83 = _foreach_add[83]\n    getitem_84 = _foreach_add[84]\n    getitem_85 = _foreach_add[85]\n    getitem_86 = _foreach_add[86]\n    getitem_87 = _foreach_add[87]\n    getitem_88 = _foreach_add[88]\n    getitem_89 = _foreach_add[89]\n    getitem_90 = _foreach_add[90]\n    getitem_91 = _foreach_add[91]\n    getitem_92 = _foreach_add[92]\n    getitem_93 = _foreach_add[93]\n    getitem_94 = _foreach_add[94]\n    getitem_95 = _foreach_add[95]\n    getitem_96 = _foreach_add[96]\n    getitem_97 = _foreach_add[97]\n    getitem_98 = _foreach_add[98]\n    getitem_99 = _foreach_add[99]\n    getitem_100 = _foreach_add[100]\n    getitem_101 = _foreach_add[101]\n    getitem_102 = _foreach_add[102]\n    getitem_103 = _foreach_add[103]\n    getitem_104 = _foreach_add[104]\n    getitem_105 = _foreach_add[105]\n    getitem_106 = _foreach_add[106]\n    getitem_107 = _foreach_add[107]\n    getitem_108 = _foreach_add[108]\n    getitem_109 = _foreach_add[109]\n    getitem_110 = _foreach_add[110]\n    getitem_111 = _foreach_add[111]\n    getitem_112 = _foreach_add[112]\n    getitem_113 = _foreach_add[113]\n    getitem_114 = _foreach_add[114]\n    getitem_115 = _foreach_add[115]\n    getitem_116 = _foreach_add[116]\n    getitem_117 = _foreach_add[117]\n    getitem_118 = _foreach_add[118]\n    getitem_119 = _foreach_add[119]\n    getitem_120 = _foreach_add[120]\n    getitem_121 = _foreach_add[121]\n    getitem_122 = _foreach_add[122]\n    getitem_123 = _foreach_add[123]\n    getitem_124 = _foreach_add[124]\n    getitem_125 = _foreach_add[125]\n    getitem_126 = _foreach_add[126]\n    getitem_127 = _foreach_add[127]\n    getitem_128 = _foreach_add[128]\n    getitem_129 = _foreach_add[129]\n    getitem_130 = _foreach_add[130]\n    getitem_131 = _foreach_add[131]\n    getitem_132 = _foreach_add[132]\n    getitem_133 = _foreach_add[133]\n    getitem_134 = _foreach_add[134]\n    getitem_135 = _foreach_add[135]\n    getitem_136 = _foreach_add[136]\n    getitem_137 = _foreach_add[137]\n    getitem_138 = _foreach_add[138]\n    getitem_139 = _foreach_add[139]\n    getitem_140 = _foreach_add[140]\n    getitem_141 = _foreach_add[141]\n    getitem_142 = _foreach_add[142]\n    getitem_143 = _foreach_add[143]\n    getitem_144 = _foreach_add[144]\n    getitem_145 = _foreach_add[145]\n    getitem_146 = _foreach_add[146]\n    getitem_147 = _foreach_add[147];  _foreach_add = None\n    _foreach_sub = torch.ops.aten._foreach_sub.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1])\n    getitem_148 = _foreach_sub[0]\n    getitem_149 = _foreach_sub[1]\n    getitem_150 = _foreach_sub[2]\n    getitem_151 = _foreach_sub[3]\n    getitem_152 = _foreach_sub[4]\n    getitem_153 = _foreach_sub[5]\n    getitem_154 = _foreach_sub[6]\n    getitem_155 = _foreach_sub[7]\n    getitem_156 = _foreach_sub[8]\n    getitem_157 = _foreach_sub[9]\n    getitem_158 = _foreach_sub[10]\n    getitem_159 = _foreach_sub[11]\n    getitem_160 = _foreach_sub[12]\n    getitem_161 = _foreach_sub[13]\n    getitem_162 = _foreach_sub[14]\n    getitem_163 = _foreach_sub[15]\n    getitem_164 = _foreach_sub[16]\n    getitem_165 = _foreach_sub[17]\n    getitem_166 = _foreach_sub[18]\n    getitem_167 = _foreach_sub[19]\n    getitem_168 = _foreach_sub[20]\n    getitem_169 = _foreach_sub[21]\n    getitem_170 = _foreach_sub[22]\n    getitem_171 = _foreach_sub[23]\n    getitem_172 = _foreach_sub[24]\n    getitem_173 = _foreach_sub[25]\n    getitem_174 = _foreach_sub[26]\n    getitem_175 = _foreach_sub[27]\n    getitem_176 = _foreach_sub[28]\n    getitem_177 = _foreach_sub[29]\n    getitem_178 = _foreach_sub[30]\n    getitem_179 = _foreach_sub[31]\n    getitem_180 = _foreach_sub[32]\n    getitem_181 = _foreach_sub[33]\n    getitem_182 = _foreach_sub[34]\n    getitem_183 = _foreach_sub[35]\n    getitem_184 = _foreach_sub[36]\n    getitem_185 = _foreach_sub[37]\n    getitem_186 = _foreach_sub[38]\n    getitem_187 = _foreach_sub[39]\n    getitem_188 = _foreach_sub[40]\n    getitem_189 = _foreach_sub[41]\n    getitem_190 = _foreach_sub[42]\n    getitem_191 = _foreach_sub[43]\n    getitem_192 = _foreach_sub[44]\n    getitem_193 = _foreach_sub[45]\n    getitem_194 = _foreach_sub[46]\n    getitem_195 = _foreach_sub[47]\n    getitem_196 = _foreach_sub[48]\n    getitem_197 = _foreach_sub[49]\n    getitem_198 = _foreach_sub[50]\n    getitem_199 = _foreach_sub[51]\n    getitem_200 = _foreach_sub[52]\n    getitem_201 = _foreach_sub[53]\n    getitem_202 = _foreach_sub[54]\n    getitem_203 = _foreach_sub[55]\n    getitem_204 = _foreach_sub[56]\n    getitem_205 = _foreach_sub[57]\n    getitem_206 = _foreach_sub[58]\n    getitem_207 = _foreach_sub[59]\n    getitem_208 = _foreach_sub[60]\n    getitem_209 = _foreach_sub[61]\n    getitem_210 = _foreach_sub[62]\n    getitem_211 = _foreach_sub[63]\n    getitem_212 = _foreach_sub[64]\n    getitem_213 = _foreach_sub[65]\n    getitem_214 = _foreach_sub[66]\n    getitem_215 = _foreach_sub[67]\n    getitem_216 = _foreach_sub[68]\n    getitem_217 = _foreach_sub[69]\n    getitem_218 = _foreach_sub[70]\n    getitem_219 = _foreach_sub[71]\n    getitem_220 = _foreach_sub[72]\n    getitem_221 = _foreach_sub[73]\n    getitem_222 = _foreach_sub[74]\n    getitem_223 = _foreach_sub[75]\n    getitem_224 = _foreach_sub[76]\n    getitem_225 = _foreach_sub[77]\n    getitem_226 = _foreach_sub[78]\n    getitem_227 = _foreach_sub[79]\n    getitem_228 = _foreach_sub[80]\n    getitem_229 = _foreach_sub[81]\n    getitem_230 = _foreach_sub[82]\n    getitem_231 = _foreach_sub[83]\n    getitem_232 = _foreach_sub[84]\n    getitem_233 = _foreach_sub[85]\n    getitem_234 = _foreach_sub[86]\n    getitem_235 = _foreach_sub[87]\n    getitem_236 = _foreach_sub[88]\n    getitem_237 = _foreach_sub[89]\n    getitem_238 = _foreach_sub[90]\n    getitem_239 = _foreach_sub[91]\n    getitem_240 = _foreach_sub[92]\n    getitem_241 = _foreach_sub[93]\n    getitem_242 = _foreach_sub[94]\n    getitem_243 = _foreach_sub[95]\n    getitem_244 = _foreach_sub[96]\n    getitem_245 = _foreach_sub[97]\n    getitem_246 = _foreach_sub[98]\n    getitem_247 = _foreach_sub[99]\n    getitem_248 = _foreach_sub[100]\n    getitem_249 = _foreach_sub[101]\n    getitem_250 = _foreach_sub[102]\n    getitem_251 = _foreach_sub[103]\n    getitem_252 = _foreach_sub[104]\n    getitem_253 = _foreach_sub[105]\n    getitem_254 = _foreach_sub[106]\n    getitem_255 = _foreach_sub[107]\n    getitem_256 = _foreach_sub[108]\n    getitem_257 = _foreach_sub[109]\n    getitem_258 = _foreach_sub[110]\n    getitem_259 = _foreach_sub[111]\n    getitem_260 = _foreach_sub[112]\n    getitem_261 = _foreach_sub[113]\n    getitem_262 = _foreach_sub[114]\n    getitem_263 = _foreach_sub[115]\n    getitem_264 = _foreach_sub[116]\n    getitem_265 = _foreach_sub[117]\n    getitem_266 = _foreach_sub[118]\n    getitem_267 = _foreach_sub[119]\n    getitem_268 = _foreach_sub[120]\n    getitem_269 = _foreach_sub[121]\n    getitem_270 = _foreach_sub[122]\n    getitem_271 = _foreach_sub[123]\n    getitem_272 = _foreach_sub[124]\n    getitem_273 = _foreach_sub[125]\n    getitem_274 = _foreach_sub[126]\n    getitem_275 = _foreach_sub[127]\n    getitem_276 = _foreach_sub[128]\n    getitem_277 = _foreach_sub[129]\n    getitem_278 = _foreach_sub[130]\n    getitem_279 = _foreach_sub[131]\n    getitem_280 = _foreach_sub[132]\n    getitem_281 = _foreach_sub[133]\n    getitem_282 = _foreach_sub[134]\n    getitem_283 = _foreach_sub[135]\n    getitem_284 = _foreach_sub[136]\n    getitem_285 = _foreach_sub[137]\n    getitem_286 = _foreach_sub[138]\n    getitem_287 = _foreach_sub[139]\n    getitem_288 = _foreach_sub[140]\n    getitem_289 = _foreach_sub[141]\n    getitem_290 = _foreach_sub[142]\n    getitem_291 = _foreach_sub[143]\n    getitem_292 = _foreach_sub[144]\n    getitem_293 = _foreach_sub[145]\n    getitem_294 = _foreach_sub[146]\n    getitem_295 = _foreach_sub[147];  _foreach_sub = None\n    _foreach_mul = torch.ops.aten._foreach_mul.Scalar([getitem_148, getitem_149, getitem_150, getitem_151, getitem_152, getitem_153, getitem_154, getitem_155, getitem_156, getitem_157, getitem_158, getitem_159, getitem_160, getitem_161, getitem_162, getitem_163, getitem_164, getitem_165, getitem_166, getitem_167, getitem_168, getitem_169, getitem_170, getitem_171, getitem_172, getitem_173, getitem_174, getitem_175, getitem_176, getitem_177, getitem_178, getitem_179, getitem_180, getitem_181, getitem_182, getitem_183, getitem_184, getitem_185, getitem_186, getitem_187, getitem_188, getitem_189, getitem_190, getitem_191, getitem_192, getitem_193, getitem_194, getitem_195, getitem_196, getitem_197, getitem_198, getitem_199, getitem_200, getitem_201, getitem_202, getitem_203, getitem_204, getitem_205, getitem_206, getitem_207, getitem_208, getitem_209, getitem_210, getitem_211, getitem_212, getitem_213, getitem_214, getitem_215, getitem_216, getitem_217, getitem_218, getitem_219, getitem_220, getitem_221, getitem_222, getitem_223, getitem_224, getitem_225, getitem_226, getitem_227, getitem_228, getitem_229, getitem_230, getitem_231, getitem_232, getitem_233, getitem_234, getitem_235, getitem_236, getitem_237, getitem_238, getitem_239, getitem_240, getitem_241, getitem_242, getitem_243, getitem_244, getitem_245, getitem_246, getitem_247, getitem_248, getitem_249, getitem_250, getitem_251, getitem_252, getitem_253, getitem_254, getitem_255, getitem_256, getitem_257, getitem_258, getitem_259, getitem_260, getitem_261, getitem_262, getitem_263, getitem_264, getitem_265, getitem_266, getitem_267, getitem_268, getitem_269, getitem_270, getitem_271, getitem_272, getitem_273, getitem_274, getitem_275, getitem_276, getitem_277, getitem_278, getitem_279, getitem_280, getitem_281, getitem_282, getitem_283, getitem_284, getitem_285, getitem_286, getitem_287, getitem_288, getitem_289, getitem_290, getitem_291, getitem_292, getitem_293, getitem_294, getitem_295], 0.09999999999999998);  getitem_148 = getitem_149 = getitem_150 = getitem_151 = getitem_152 = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = getitem_158 = getitem_159 = getitem_160 = getitem_161 = getitem_162 = getitem_163 = getitem_164 = getitem_165 = getitem_166 = getitem_167 = getitem_168 = getitem_169 = getitem_170 = getitem_171 = getitem_172 = getitem_173 = getitem_174 = getitem_175 = getitem_176 = getitem_177 = getitem_178 = getitem_179 = getitem_180 = getitem_181 = getitem_182 = getitem_183 = getitem_184 = getitem_185 = getitem_186 = getitem_187 = getitem_188 = getitem_189 = getitem_190 = getitem_191 = getitem_192 = getitem_193 = getitem_194 = getitem_195 = getitem_196 = getitem_197 = getitem_198 = getitem_199 = getitem_200 = getitem_201 = getitem_202 = getitem_203 = getitem_204 = getitem_205 = getitem_206 = getitem_207 = getitem_208 = getitem_209 = getitem_210 = getitem_211 = getitem_212 = getitem_213 = getitem_214 = getitem_215 = getitem_216 = getitem_217 = getitem_218 = getitem_219 = getitem_220 = getitem_221 = getitem_222 = getitem_223 = getitem_224 = getitem_225 = getitem_226 = getitem_227 = getitem_228 = getitem_229 = getitem_230 = getitem_231 = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = getitem_237 = getitem_238 = getitem_239 = getitem_240 = getitem_241 = getitem_242 = getitem_243 = getitem_244 = getitem_245 = getitem_246 = getitem_247 = getitem_248 = getitem_249 = getitem_250 = getitem_251 = getitem_252 = getitem_253 = getitem_254 = getitem_255 = getitem_256 = getitem_257 = getitem_258 = getitem_259 = getitem_260 = getitem_261 = getitem_262 = getitem_263 = getitem_264 = getitem_265 = getitem_266 = getitem_267 = getitem_268 = getitem_269 = getitem_270 = getitem_271 = getitem_272 = getitem_273 = getitem_274 = getitem_275 = getitem_276 = getitem_277 = getitem_278 = getitem_279 = getitem_280 = getitem_281 = getitem_282 = getitem_283 = getitem_284 = getitem_285 = getitem_286 = getitem_287 = getitem_288 = getitem_289 = getitem_290 = getitem_291 = getitem_292 = getitem_293 = getitem_294 = getitem_295 = None\n    getitem_296 = _foreach_mul[0]\n    getitem_297 = _foreach_mul[1]\n    getitem_298 = _foreach_mul[2]\n    getitem_299 = _foreach_mul[3]\n    getitem_300 = _foreach_mul[4]\n    getitem_301 = _foreach_mul[5]\n    getitem_302 = _foreach_mul[6]\n    getitem_303 = _foreach_mul[7]\n    getitem_304 = _foreach_mul[8]\n    getitem_305 = _foreach_mul[9]\n    getitem_306 = _foreach_mul[10]\n    getitem_307 = _foreach_mul[11]\n    getitem_308 = _foreach_mul[12]\n    getitem_309 = _foreach_mul[13]\n    getitem_310 = _foreach_mul[14]\n    getitem_311 = _foreach_mul[15]\n    getitem_312 = _foreach_mul[16]\n    getitem_313 = _foreach_mul[17]\n    getitem_314 = _foreach_mul[18]\n    getitem_315 = _foreach_mul[19]\n    getitem_316 = _foreach_mul[20]\n    getitem_317 = _foreach_mul[21]\n    getitem_318 = _foreach_mul[22]\n    getitem_319 = _foreach_mul[23]\n    getitem_320 = _foreach_mul[24]\n    getitem_321 = _foreach_mul[25]\n    getitem_322 = _foreach_mul[26]\n    getitem_323 = _foreach_mul[27]\n    getitem_324 = _foreach_mul[28]\n    getitem_325 = _foreach_mul[29]\n    getitem_326 = _foreach_mul[30]\n    getitem_327 = _foreach_mul[31]\n    getitem_328 = _foreach_mul[32]\n    getitem_329 = _foreach_mul[33]\n    getitem_330 = _foreach_mul[34]\n    getitem_331 = _foreach_mul[35]\n    getitem_332 = _foreach_mul[36]\n    getitem_333 = _foreach_mul[37]\n    getitem_334 = _foreach_mul[38]\n    getitem_335 = _foreach_mul[39]\n    getitem_336 = _foreach_mul[40]\n    getitem_337 = _foreach_mul[41]\n    getitem_338 = _foreach_mul[42]\n    getitem_339 = _foreach_mul[43]\n    getitem_340 = _foreach_mul[44]\n    getitem_341 = _foreach_mul[45]\n    getitem_342 = _foreach_mul[46]\n    getitem_343 = _foreach_mul[47]\n    getitem_344 = _foreach_mul[48]\n    getitem_345 = _foreach_mul[49]\n    getitem_346 = _foreach_mul[50]\n    getitem_347 = _foreach_mul[51]\n    getitem_348 = _foreach_mul[52]\n    getitem_349 = _foreach_mul[53]\n    getitem_350 = _foreach_mul[54]\n    getitem_351 = _foreach_mul[55]\n    getitem_352 = _foreach_mul[56]\n    getitem_353 = _foreach_mul[57]\n    getitem_354 = _foreach_mul[58]\n    getitem_355 = _foreach_mul[59]\n    getitem_356 = _foreach_mul[60]\n    getitem_357 = _foreach_mul[61]\n    getitem_358 = _foreach_mul[62]\n    getitem_359 = _foreach_mul[63]\n    getitem_360 = _foreach_mul[64]\n    getitem_361 = _foreach_mul[65]\n    getitem_362 = _foreach_mul[66]\n    getitem_363 = _foreach_mul[67]\n    getitem_364 = _foreach_mul[68]\n    getitem_365 = _foreach_mul[69]\n    getitem_366 = _foreach_mul[70]\n    getitem_367 = _foreach_mul[71]\n    getitem_368 = _foreach_mul[72]\n    getitem_369 = _foreach_mul[73]\n    getitem_370 = _foreach_mul[74]\n    getitem_371 = _foreach_mul[75]\n    getitem_372 = _foreach_mul[76]\n    getitem_373 = _foreach_mul[77]\n    getitem_374 = _foreach_mul[78]\n    getitem_375 = _foreach_mul[79]\n    getitem_376 = _foreach_mul[80]\n    getitem_377 = _foreach_mul[81]\n    getitem_378 = _foreach_mul[82]\n    getitem_379 = _foreach_mul[83]\n    getitem_380 = _foreach_mul[84]\n    getitem_381 = _foreach_mul[85]\n    getitem_382 = _foreach_mul[86]\n    getitem_383 = _foreach_mul[87]\n    getitem_384 = _foreach_mul[88]\n    getitem_385 = _foreach_mul[89]\n    getitem_386 = _foreach_mul[90]\n    getitem_387 = _foreach_mul[91]\n    getitem_388 = _foreach_mul[92]\n    getitem_389 = _foreach_mul[93]\n    getitem_390 = _foreach_mul[94]\n    getitem_391 = _foreach_mul[95]\n    getitem_392 = _foreach_mul[96]\n    getitem_393 = _foreach_mul[97]\n    getitem_394 = _foreach_mul[98]\n    getitem_395 = _foreach_mul[99]\n    getitem_396 = _foreach_mul[100]\n    getitem_397 = _foreach_mul[101]\n    getitem_398 = _foreach_mul[102]\n    getitem_399 = _foreach_mul[103]\n    getitem_400 = _foreach_mul[104]\n    getitem_401 = _foreach_mul[105]\n    getitem_402 = _foreach_mul[106]\n    getitem_403 = _foreach_mul[107]\n    getitem_404 = _foreach_mul[108]\n    getitem_405 = _foreach_mul[109]\n    getitem_406 = _foreach_mul[110]\n    getitem_407 = _foreach_mul[111]\n    getitem_408 = _foreach_mul[112]\n    getitem_409 = _foreach_mul[113]\n    getitem_410 = _foreach_mul[114]\n    getitem_411 = _foreach_mul[115]\n    getitem_412 = _foreach_mul[116]\n    getitem_413 = _foreach_mul[117]\n    getitem_414 = _foreach_mul[118]\n    getitem_415 = _foreach_mul[119]\n    getitem_416 = _foreach_mul[120]\n    getitem_417 = _foreach_mul[121]\n    getitem_418 = _foreach_mul[122]\n    getitem_419 = _foreach_mul[123]\n    getitem_420 = _foreach_mul[124]\n    getitem_421 = _foreach_mul[125]\n    getitem_422 = _foreach_mul[126]\n    getitem_423 = _foreach_mul[127]\n    getitem_424 = _foreach_mul[128]\n    getitem_425 = _foreach_mul[129]\n    getitem_426 = _foreach_mul[130]\n    getitem_427 = _foreach_mul[131]\n    getitem_428 = _foreach_mul[132]\n    getitem_429 = _foreach_mul[133]\n    getitem_430 = _foreach_mul[134]\n    getitem_431 = _foreach_mul[135]\n    getitem_432 = _foreach_mul[136]\n    getitem_433 = _foreach_mul[137]\n    getitem_434 = _foreach_mul[138]\n    getitem_435 = _foreach_mul[139]\n    getitem_436 = _foreach_mul[140]\n    getitem_437 = _foreach_mul[141]\n    getitem_438 = _foreach_mul[142]\n    getitem_439 = _foreach_mul[143]\n    getitem_440 = _foreach_mul[144]\n    getitem_441 = _foreach_mul[145]\n    getitem_442 = _foreach_mul[146]\n    getitem_443 = _foreach_mul[147];  _foreach_mul = None\n    _foreach_add_1 = torch.ops.aten._foreach_add.List([arg299_1, arg149_1, arg300_1, arg301_1, arg302_1, arg303_1, arg304_1, arg305_1, arg306_1, arg307_1, arg308_1, arg309_1, arg310_1, arg311_1, arg312_1, arg313_1, arg314_1, arg315_1, arg316_1, arg317_1, arg318_1, arg319_1, arg320_1, arg321_1, arg322_1, arg323_1, arg324_1, arg325_1, arg326_1, arg327_1, arg328_1, arg329_1, arg330_1, arg331_1, arg332_1, arg333_1, arg334_1, arg335_1, arg336_1, arg337_1, arg338_1, arg339_1, arg340_1, arg341_1, arg342_1, arg343_1, arg344_1, arg345_1, arg346_1, arg347_1, arg348_1, arg349_1, arg350_1, arg351_1, arg352_1, arg353_1, arg354_1, arg355_1, arg356_1, arg357_1, arg358_1, arg359_1, arg360_1, arg361_1, arg362_1, arg363_1, arg364_1, arg365_1, arg366_1, arg367_1, arg368_1, arg369_1, arg370_1, arg371_1, arg372_1, arg373_1, arg374_1, arg375_1, arg376_1, arg377_1, arg378_1, arg379_1, arg380_1, arg381_1, arg382_1, arg383_1, arg384_1, arg385_1, arg386_1, arg387_1, arg388_1, arg389_1, arg390_1, arg391_1, arg392_1, arg393_1, arg394_1, arg395_1, arg396_1, arg397_1, arg398_1, arg399_1, arg400_1, arg401_1, arg402_1, arg403_1, arg404_1, arg405_1, arg406_1, arg407_1, arg408_1, arg409_1, arg410_1, arg411_1, arg412_1, arg413_1, arg414_1, arg415_1, arg416_1, arg417_1, arg418_1, arg419_1, arg420_1, arg421_1, arg422_1, arg423_1, arg424_1, arg425_1, arg426_1, arg427_1, arg428_1, arg429_1, arg430_1, arg431_1, arg432_1, arg433_1, arg434_1, arg435_1, arg436_1, arg437_1, arg438_1, arg439_1, arg440_1, arg441_1, arg442_1, arg443_1, arg444_1, arg445_1], [getitem_296, getitem_297, getitem_298, getitem_299, getitem_300, getitem_301, getitem_302, getitem_303, getitem_304, getitem_305, getitem_306, getitem_307, getitem_308, getitem_309, getitem_310, getitem_311, getitem_312, getitem_313, getitem_314, getitem_315, getitem_316, getitem_317, getitem_318, getitem_319, getitem_320, getitem_321, getitem_322, getitem_323, getitem_324, getitem_325, getitem_326, getitem_327, getitem_328, getitem_329, getitem_330, getitem_331, getitem_332, getitem_333, getitem_334, getitem_335, getitem_336, getitem_337, getitem_338, getitem_339, getitem_340, getitem_341, getitem_342, getitem_343, getitem_344, getitem_345, getitem_346, getitem_347, getitem_348, getitem_349, getitem_350, getitem_351, getitem_352, getitem_353, getitem_354, getitem_355, getitem_356, getitem_357, getitem_358, getitem_359, getitem_360, getitem_361, getitem_362, getitem_363, getitem_364, getitem_365, getitem_366, getitem_367, getitem_368, getitem_369, getitem_370, getitem_371, getitem_372, getitem_373, getitem_374, getitem_375, getitem_376, getitem_377, getitem_378, getitem_379, getitem_380, getitem_381, getitem_382, getitem_383, getitem_384, getitem_385, getitem_386, getitem_387, getitem_388, getitem_389, getitem_390, getitem_391, getitem_392, getitem_393, getitem_394, getitem_395, getitem_396, getitem_397, getitem_398, getitem_399, getitem_400, getitem_401, getitem_402, getitem_403, getitem_404, getitem_405, getitem_406, getitem_407, getitem_408, getitem_409, getitem_410, getitem_411, getitem_412, getitem_413, getitem_414, getitem_415, getitem_416, getitem_417, getitem_418, getitem_419, getitem_420, getitem_421, getitem_422, getitem_423, getitem_424, getitem_425, getitem_426, getitem_427, getitem_428, getitem_429, getitem_430, getitem_431, getitem_432, getitem_433, getitem_434, getitem_435, getitem_436, getitem_437, getitem_438, getitem_439, getitem_440, getitem_441, getitem_442, getitem_443]);  getitem_296 = getitem_297 = getitem_298 = getitem_299 = getitem_300 = getitem_301 = getitem_302 = getitem_303 = getitem_304 = getitem_305 = getitem_306 = getitem_307 = getitem_308 = getitem_309 = getitem_310 = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = getitem_316 = getitem_317 = getitem_318 = getitem_319 = getitem_320 = getitem_321 = getitem_322 = getitem_323 = getitem_324 = getitem_325 = getitem_326 = getitem_327 = getitem_328 = getitem_329 = getitem_330 = getitem_331 = getitem_332 = getitem_333 = getitem_334 = getitem_335 = getitem_336 = getitem_337 = getitem_338 = getitem_339 = getitem_340 = getitem_341 = getitem_342 = getitem_343 = getitem_344 = getitem_345 = getitem_346 = getitem_347 = getitem_348 = getitem_349 = getitem_350 = getitem_351 = getitem_352 = getitem_353 = getitem_354 = getitem_355 = getitem_356 = getitem_357 = getitem_358 = getitem_359 = getitem_360 = getitem_361 = getitem_362 = getitem_363 = getitem_364 = getitem_365 = getitem_366 = getitem_367 = getitem_368 = getitem_369 = getitem_370 = getitem_371 = getitem_372 = getitem_373 = getitem_374 = getitem_375 = getitem_376 = getitem_377 = getitem_378 = getitem_379 = getitem_380 = getitem_381 = getitem_382 = getitem_383 = getitem_384 = getitem_385 = getitem_386 = getitem_387 = getitem_388 = getitem_389 = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = getitem_395 = getitem_396 = getitem_397 = getitem_398 = getitem_399 = getitem_400 = getitem_401 = getitem_402 = getitem_403 = getitem_404 = getitem_405 = getitem_406 = getitem_407 = getitem_408 = getitem_409 = getitem_410 = getitem_411 = getitem_412 = getitem_413 = getitem_414 = getitem_415 = getitem_416 = getitem_417 = getitem_418 = getitem_419 = getitem_420 = getitem_421 = getitem_422 = getitem_423 = getitem_424 = getitem_425 = getitem_426 = getitem_427 = getitem_428 = getitem_429 = getitem_430 = getitem_431 = getitem_432 = getitem_433 = getitem_434 = getitem_435 = getitem_436 = getitem_437 = getitem_438 = getitem_439 = getitem_440 = getitem_441 = getitem_442 = getitem_443 = None\n    getitem_444 = _foreach_add_1[0]\n    getitem_445 = _foreach_add_1[1]\n    getitem_446 = _foreach_add_1[2]\n    getitem_447 = _foreach_add_1[3]\n    getitem_448 = _foreach_add_1[4]\n    getitem_449 = _foreach_add_1[5]\n    getitem_450 = _foreach_add_1[6]\n    getitem_451 = _foreach_add_1[7]\n    getitem_452 = _foreach_add_1[8]\n    getitem_453 = _foreach_add_1[9]\n    getitem_454 = _foreach_add_1[10]\n    getitem_455 = _foreach_add_1[11]\n    getitem_456 = _foreach_add_1[12]\n    getitem_457 = _foreach_add_1[13]\n    getitem_458 = _foreach_add_1[14]\n    getitem_459 = _foreach_add_1[15]\n    getitem_460 = _foreach_add_1[16]\n    getitem_461 = _foreach_add_1[17]\n    getitem_462 = _foreach_add_1[18]\n    getitem_463 = _foreach_add_1[19]\n    getitem_464 = _foreach_add_1[20]\n    getitem_465 = _foreach_add_1[21]\n    getitem_466 = _foreach_add_1[22]\n    getitem_467 = _foreach_add_1[23]\n    getitem_468 = _foreach_add_1[24]\n    getitem_469 = _foreach_add_1[25]\n    getitem_470 = _foreach_add_1[26]\n    getitem_471 = _foreach_add_1[27]\n    getitem_472 = _foreach_add_1[28]\n    getitem_473 = _foreach_add_1[29]\n    getitem_474 = _foreach_add_1[30]\n    getitem_475 = _foreach_add_1[31]\n    getitem_476 = _foreach_add_1[32]\n    getitem_477 = _foreach_add_1[33]\n    getitem_478 = _foreach_add_1[34]\n    getitem_479 = _foreach_add_1[35]\n    getitem_480 = _foreach_add_1[36]\n    getitem_481 = _foreach_add_1[37]\n    getitem_482 = _foreach_add_1[38]\n    getitem_483 = _foreach_add_1[39]\n    getitem_484 = _foreach_add_1[40]\n    getitem_485 = _foreach_add_1[41]\n    getitem_486 = _foreach_add_1[42]\n    getitem_487 = _foreach_add_1[43]\n    getitem_488 = _foreach_add_1[44]\n    getitem_489 = _foreach_add_1[45]\n    getitem_490 = _foreach_add_1[46]\n    getitem_491 = _foreach_add_1[47]\n    getitem_492 = _foreach_add_1[48]\n    getitem_493 = _foreach_add_1[49]\n    getitem_494 = _foreach_add_1[50]\n    getitem_495 = _foreach_add_1[51]\n    getitem_496 = _foreach_add_1[52]\n    getitem_497 = _foreach_add_1[53]\n    getitem_498 = _foreach_add_1[54]\n    getitem_499 = _foreach_add_1[55]\n    getitem_500 = _foreach_add_1[56]\n    getitem_501 = _foreach_add_1[57]\n    getitem_502 = _foreach_add_1[58]\n    getitem_503 = _foreach_add_1[59]\n    getitem_504 = _foreach_add_1[60]\n    getitem_505 = _foreach_add_1[61]\n    getitem_506 = _foreach_add_1[62]\n    getitem_507 = _foreach_add_1[63]\n    getitem_508 = _foreach_add_1[64]\n    getitem_509 = _foreach_add_1[65]\n    getitem_510 = _foreach_add_1[66]\n    getitem_511 = _foreach_add_1[67]\n    getitem_512 = _foreach_add_1[68]\n    getitem_513 = _foreach_add_1[69]\n    getitem_514 = _foreach_add_1[70]\n    getitem_515 = _foreach_add_1[71]\n    getitem_516 = _foreach_add_1[72]\n    getitem_517 = _foreach_add_1[73]\n    getitem_518 = _foreach_add_1[74]\n    getitem_519 = _foreach_add_1[75]\n    getitem_520 = _foreach_add_1[76]\n    getitem_521 = _foreach_add_1[77]\n    getitem_522 = _foreach_add_1[78]\n    getitem_523 = _foreach_add_1[79]\n    getitem_524 = _foreach_add_1[80]\n    getitem_525 = _foreach_add_1[81]\n    getitem_526 = _foreach_add_1[82]\n    getitem_527 = _foreach_add_1[83]\n    getitem_528 = _foreach_add_1[84]\n    getitem_529 = _foreach_add_1[85]\n    getitem_530 = _foreach_add_1[86]\n    getitem_531 = _foreach_add_1[87]\n    getitem_532 = _foreach_add_1[88]\n    getitem_533 = _foreach_add_1[89]\n    getitem_534 = _foreach_add_1[90]\n    getitem_535 = _foreach_add_1[91]\n    getitem_536 = _foreach_add_1[92]\n    getitem_537 = _foreach_add_1[93]\n    getitem_538 = _foreach_add_1[94]\n    getitem_539 = _foreach_add_1[95]\n    getitem_540 = _foreach_add_1[96]\n    getitem_541 = _foreach_add_1[97]\n    getitem_542 = _foreach_add_1[98]\n    getitem_543 = _foreach_add_1[99]\n    getitem_544 = _foreach_add_1[100]\n    getitem_545 = _foreach_add_1[101]\n    getitem_546 = _foreach_add_1[102]\n    getitem_547 = _foreach_add_1[103]\n    getitem_548 = _foreach_add_1[104]\n    getitem_549 = _foreach_add_1[105]\n    getitem_550 = _foreach_add_1[106]\n    getitem_551 = _foreach_add_1[107]\n    getitem_552 = _foreach_add_1[108]\n    getitem_553 = _foreach_add_1[109]\n    getitem_554 = _foreach_add_1[110]\n    getitem_555 = _foreach_add_1[111]\n    getitem_556 = _foreach_add_1[112]\n    getitem_557 = _foreach_add_1[113]\n    getitem_558 = _foreach_add_1[114]\n    getitem_559 = _foreach_add_1[115]\n    getitem_560 = _foreach_add_1[116]\n    getitem_561 = _foreach_add_1[117]\n    getitem_562 = _foreach_add_1[118]\n    getitem_563 = _foreach_add_1[119]\n    getitem_564 = _foreach_add_1[120]\n    getitem_565 = _foreach_add_1[121]\n    getitem_566 = _foreach_add_1[122]\n    getitem_567 = _foreach_add_1[123]\n    getitem_568 = _foreach_add_1[124]\n    getitem_569 = _foreach_add_1[125]\n    getitem_570 = _foreach_add_1[126]\n    getitem_571 = _foreach_add_1[127]\n    getitem_572 = _foreach_add_1[128]\n    getitem_573 = _foreach_add_1[129]\n    getitem_574 = _foreach_add_1[130]\n    getitem_575 = _foreach_add_1[131]\n    getitem_576 = _foreach_add_1[132]\n    getitem_577 = _foreach_add_1[133]\n    getitem_578 = _foreach_add_1[134]\n    getitem_579 = _foreach_add_1[135]\n    getitem_580 = _foreach_add_1[136]\n    getitem_581 = _foreach_add_1[137]\n    getitem_582 = _foreach_add_1[138]\n    getitem_583 = _foreach_add_1[139]\n    getitem_584 = _foreach_add_1[140]\n    getitem_585 = _foreach_add_1[141]\n    getitem_586 = _foreach_add_1[142]\n    getitem_587 = _foreach_add_1[143]\n    getitem_588 = _foreach_add_1[144]\n    getitem_589 = _foreach_add_1[145]\n    getitem_590 = _foreach_add_1[146]\n    getitem_591 = _foreach_add_1[147];  _foreach_add_1 = None\n    _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar([arg446_1, arg150_1, arg447_1, arg448_1, arg449_1, arg450_1, arg451_1, arg452_1, arg453_1, arg454_1, arg455_1, arg456_1, arg457_1, arg458_1, arg459_1, arg460_1, arg461_1, arg462_1, arg463_1, arg464_1, arg465_1, arg466_1, arg467_1, arg468_1, arg469_1, arg470_1, arg471_1, arg472_1, arg473_1, arg474_1, arg475_1, arg476_1, arg477_1, arg478_1, arg479_1, arg480_1, arg481_1, arg482_1, arg483_1, arg484_1, arg485_1, arg486_1, arg487_1, arg488_1, arg489_1, arg490_1, arg491_1, arg492_1, arg493_1, arg494_1, arg495_1, arg496_1, arg497_1, arg498_1, arg499_1, arg500_1, arg501_1, arg502_1, arg503_1, arg504_1, arg505_1, arg506_1, arg507_1, arg508_1, arg509_1, arg510_1, arg511_1, arg512_1, arg513_1, arg514_1, arg515_1, arg516_1, arg517_1, arg518_1, arg519_1, arg520_1, arg521_1, arg522_1, arg523_1, arg524_1, arg525_1, arg526_1, arg527_1, arg528_1, arg529_1, arg530_1, arg531_1, arg532_1, arg533_1, arg534_1, arg535_1, arg536_1, arg537_1, arg538_1, arg539_1, arg540_1, arg541_1, arg542_1, arg543_1, arg544_1, arg545_1, arg546_1, arg547_1, arg548_1, arg549_1, arg550_1, arg551_1, arg552_1, arg553_1, arg554_1, arg555_1, arg556_1, arg557_1, arg558_1, arg559_1, arg560_1, arg561_1, arg562_1, arg563_1, arg564_1, arg565_1, arg566_1, arg567_1, arg568_1, arg569_1, arg570_1, arg571_1, arg572_1, arg573_1, arg574_1, arg575_1, arg576_1, arg577_1, arg578_1, arg579_1, arg580_1, arg581_1, arg582_1, arg583_1, arg584_1, arg585_1, arg586_1, arg587_1, arg588_1, arg589_1, arg590_1, arg591_1, arg592_1], 0.999)\n    getitem_592 = _foreach_mul_1[0]\n    getitem_593 = _foreach_mul_1[1]\n    getitem_594 = _foreach_mul_1[2]\n    getitem_595 = _foreach_mul_1[3]\n    getitem_596 = _foreach_mul_1[4]\n    getitem_597 = _foreach_mul_1[5]\n    getitem_598 = _foreach_mul_1[6]\n    getitem_599 = _foreach_mul_1[7]\n    getitem_600 = _foreach_mul_1[8]\n    getitem_601 = _foreach_mul_1[9]\n    getitem_602 = _foreach_mul_1[10]\n    getitem_603 = _foreach_mul_1[11]\n    getitem_604 = _foreach_mul_1[12]\n    getitem_605 = _foreach_mul_1[13]\n    getitem_606 = _foreach_mul_1[14]\n    getitem_607 = _foreach_mul_1[15]\n    getitem_608 = _foreach_mul_1[16]\n    getitem_609 = _foreach_mul_1[17]\n    getitem_610 = _foreach_mul_1[18]\n    getitem_611 = _foreach_mul_1[19]\n    getitem_612 = _foreach_mul_1[20]\n    getitem_613 = _foreach_mul_1[21]\n    getitem_614 = _foreach_mul_1[22]\n    getitem_615 = _foreach_mul_1[23]\n    getitem_616 = _foreach_mul_1[24]\n    getitem_617 = _foreach_mul_1[25]\n    getitem_618 = _foreach_mul_1[26]\n    getitem_619 = _foreach_mul_1[27]\n    getitem_620 = _foreach_mul_1[28]\n    getitem_621 = _foreach_mul_1[29]\n    getitem_622 = _foreach_mul_1[30]\n    getitem_623 = _foreach_mul_1[31]\n    getitem_624 = _foreach_mul_1[32]\n    getitem_625 = _foreach_mul_1[33]\n    getitem_626 = _foreach_mul_1[34]\n    getitem_627 = _foreach_mul_1[35]\n    getitem_628 = _foreach_mul_1[36]\n    getitem_629 = _foreach_mul_1[37]\n    getitem_630 = _foreach_mul_1[38]\n    getitem_631 = _foreach_mul_1[39]\n    getitem_632 = _foreach_mul_1[40]\n    getitem_633 = _foreach_mul_1[41]\n    getitem_634 = _foreach_mul_1[42]\n    getitem_635 = _foreach_mul_1[43]\n    getitem_636 = _foreach_mul_1[44]\n    getitem_637 = _foreach_mul_1[45]\n    getitem_638 = _foreach_mul_1[46]\n    getitem_639 = _foreach_mul_1[47]\n    getitem_640 = _foreach_mul_1[48]\n    getitem_641 = _foreach_mul_1[49]\n    getitem_642 = _foreach_mul_1[50]\n    getitem_643 = _foreach_mul_1[51]\n    getitem_644 = _foreach_mul_1[52]\n    getitem_645 = _foreach_mul_1[53]\n    getitem_646 = _foreach_mul_1[54]\n    getitem_647 = _foreach_mul_1[55]\n    getitem_648 = _foreach_mul_1[56]\n    getitem_649 = _foreach_mul_1[57]\n    getitem_650 = _foreach_mul_1[58]\n    getitem_651 = _foreach_mul_1[59]\n    getitem_652 = _foreach_mul_1[60]\n    getitem_653 = _foreach_mul_1[61]\n    getitem_654 = _foreach_mul_1[62]\n    getitem_655 = _foreach_mul_1[63]\n    getitem_656 = _foreach_mul_1[64]\n    getitem_657 = _foreach_mul_1[65]\n    getitem_658 = _foreach_mul_1[66]\n    getitem_659 = _foreach_mul_1[67]\n    getitem_660 = _foreach_mul_1[68]\n    getitem_661 = _foreach_mul_1[69]\n    getitem_662 = _foreach_mul_1[70]\n    getitem_663 = _foreach_mul_1[71]\n    getitem_664 = _foreach_mul_1[72]\n    getitem_665 = _foreach_mul_1[73]\n    getitem_666 = _foreach_mul_1[74]\n    getitem_667 = _foreach_mul_1[75]\n    getitem_668 = _foreach_mul_1[76]\n    getitem_669 = _foreach_mul_1[77]\n    getitem_670 = _foreach_mul_1[78]\n    getitem_671 = _foreach_mul_1[79]\n    getitem_672 = _foreach_mul_1[80]\n    getitem_673 = _foreach_mul_1[81]\n    getitem_674 = _foreach_mul_1[82]\n    getitem_675 = _foreach_mul_1[83]\n    getitem_676 = _foreach_mul_1[84]\n    getitem_677 = _foreach_mul_1[85]\n    getitem_678 = _foreach_mul_1[86]\n    getitem_679 = _foreach_mul_1[87]\n    getitem_680 = _foreach_mul_1[88]\n    getitem_681 = _foreach_mul_1[89]\n    getitem_682 = _foreach_mul_1[90]\n    getitem_683 = _foreach_mul_1[91]\n    getitem_684 = _foreach_mul_1[92]\n    getitem_685 = _foreach_mul_1[93]\n    getitem_686 = _foreach_mul_1[94]\n    getitem_687 = _foreach_mul_1[95]\n    getitem_688 = _foreach_mul_1[96]\n    getitem_689 = _foreach_mul_1[97]\n    getitem_690 = _foreach_mul_1[98]\n    getitem_691 = _foreach_mul_1[99]\n    getitem_692 = _foreach_mul_1[100]\n    getitem_693 = _foreach_mul_1[101]\n    getitem_694 = _foreach_mul_1[102]\n    getitem_695 = _foreach_mul_1[103]\n    getitem_696 = _foreach_mul_1[104]\n    getitem_697 = _foreach_mul_1[105]\n    getitem_698 = _foreach_mul_1[106]\n    getitem_699 = _foreach_mul_1[107]\n    getitem_700 = _foreach_mul_1[108]\n    getitem_701 = _foreach_mul_1[109]\n    getitem_702 = _foreach_mul_1[110]\n    getitem_703 = _foreach_mul_1[111]\n    getitem_704 = _foreach_mul_1[112]\n    getitem_705 = _foreach_mul_1[113]\n    getitem_706 = _foreach_mul_1[114]\n    getitem_707 = _foreach_mul_1[115]\n    getitem_708 = _foreach_mul_1[116]\n    getitem_709 = _foreach_mul_1[117]\n    getitem_710 = _foreach_mul_1[118]\n    getitem_711 = _foreach_mul_1[119]\n    getitem_712 = _foreach_mul_1[120]\n    getitem_713 = _foreach_mul_1[121]\n    getitem_714 = _foreach_mul_1[122]\n    getitem_715 = _foreach_mul_1[123]\n    getitem_716 = _foreach_mul_1[124]\n    getitem_717 = _foreach_mul_1[125]\n    getitem_718 = _foreach_mul_1[126]\n    getitem_719 = _foreach_mul_1[127]\n    getitem_720 = _foreach_mul_1[128]\n    getitem_721 = _foreach_mul_1[129]\n    getitem_722 = _foreach_mul_1[130]\n    getitem_723 = _foreach_mul_1[131]\n    getitem_724 = _foreach_mul_1[132]\n    getitem_725 = _foreach_mul_1[133]\n    getitem_726 = _foreach_mul_1[134]\n    getitem_727 = _foreach_mul_1[135]\n    getitem_728 = _foreach_mul_1[136]\n    getitem_729 = _foreach_mul_1[137]\n    getitem_730 = _foreach_mul_1[138]\n    getitem_731 = _foreach_mul_1[139]\n    getitem_732 = _foreach_mul_1[140]\n    getitem_733 = _foreach_mul_1[141]\n    getitem_734 = _foreach_mul_1[142]\n    getitem_735 = _foreach_mul_1[143]\n    getitem_736 = _foreach_mul_1[144]\n    getitem_737 = _foreach_mul_1[145]\n    getitem_738 = _foreach_mul_1[146]\n    getitem_739 = _foreach_mul_1[147];  _foreach_mul_1 = None\n    _foreach_mul_2 = torch.ops.aten._foreach_mul.List([arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1], [arg151_1, arg152_1, arg153_1, arg154_1, arg155_1, arg156_1, arg157_1, arg158_1, arg159_1, arg160_1, arg161_1, arg162_1, arg163_1, arg164_1, arg165_1, arg166_1, arg167_1, arg168_1, arg169_1, arg170_1, arg171_1, arg172_1, arg173_1, arg174_1, arg175_1, arg176_1, arg177_1, arg178_1, arg179_1, arg180_1, arg181_1, arg182_1, arg183_1, arg184_1, arg185_1, arg186_1, arg187_1, arg188_1, arg189_1, arg190_1, arg191_1, arg192_1, arg193_1, arg194_1, arg195_1, arg196_1, arg197_1, arg198_1, arg199_1, arg200_1, arg201_1, arg202_1, arg203_1, arg204_1, arg205_1, arg206_1, arg207_1, arg208_1, arg209_1, arg210_1, arg211_1, arg212_1, arg213_1, arg214_1, arg215_1, arg216_1, arg217_1, arg218_1, arg219_1, arg220_1, arg221_1, arg222_1, arg223_1, arg224_1, arg225_1, arg226_1, arg227_1, arg228_1, arg229_1, arg230_1, arg231_1, arg232_1, arg233_1, arg234_1, arg235_1, arg236_1, arg237_1, arg238_1, arg239_1, arg240_1, arg241_1, arg242_1, arg243_1, arg244_1, arg245_1, arg246_1, arg247_1, arg248_1, arg249_1, arg250_1, arg251_1, arg252_1, arg253_1, arg254_1, arg255_1, arg256_1, arg257_1, arg258_1, arg259_1, arg260_1, arg261_1, arg262_1, arg263_1, arg264_1, arg265_1, arg266_1, arg267_1, arg268_1, arg269_1, arg270_1, arg271_1, arg272_1, arg273_1, arg274_1, arg275_1, arg276_1, arg277_1, arg278_1, arg279_1, arg280_1, arg281_1, arg282_1, arg283_1, arg284_1, arg285_1, arg286_1, arg287_1, arg288_1, arg289_1, arg290_1, arg291_1, arg292_1, arg293_1, arg294_1, arg295_1, arg296_1, arg297_1, arg298_1]);  arg151_1 = arg152_1 = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = arg159_1 = arg160_1 = arg161_1 = arg162_1 = arg163_1 = arg164_1 = arg165_1 = arg166_1 = arg167_1 = arg168_1 = arg169_1 = arg170_1 = arg171_1 = arg172_1 = arg173_1 = arg174_1 = arg175_1 = arg176_1 = arg177_1 = arg178_1 = arg179_1 = arg180_1 = arg181_1 = arg182_1 = arg183_1 = arg184_1 = arg185_1 = arg186_1 = arg187_1 = arg188_1 = arg189_1 = arg190_1 = arg191_1 = arg192_1 = arg193_1 = arg194_1 = arg195_1 = arg196_1 = arg197_1 = arg198_1 = arg199_1 = arg200_1 = arg201_1 = arg202_1 = arg203_1 = arg204_1 = arg205_1 = arg206_1 = arg207_1 = arg208_1 = arg209_1 = arg210_1 = arg211_1 = arg212_1 = arg213_1 = arg214_1 = arg215_1 = arg216_1 = arg217_1 = arg218_1 = arg219_1 = arg220_1 = arg221_1 = arg222_1 = arg223_1 = arg224_1 = arg225_1 = arg226_1 = arg227_1 = arg228_1 = arg229_1 = arg230_1 = arg231_1 = arg232_1 = arg233_1 = arg234_1 = arg235_1 = arg236_1 = arg237_1 = arg238_1 = arg239_1 = arg240_1 = arg241_1 = arg242_1 = arg243_1 = arg244_1 = arg245_1 = arg246_1 = arg247_1 = arg248_1 = arg249_1 = arg250_1 = arg251_1 = arg252_1 = arg253_1 = arg254_1 = arg255_1 = arg256_1 = arg257_1 = arg258_1 = arg259_1 = arg260_1 = arg261_1 = arg262_1 = arg263_1 = arg264_1 = arg265_1 = arg266_1 = arg267_1 = arg268_1 = arg269_1 = arg270_1 = arg271_1 = arg272_1 = arg273_1 = arg274_1 = arg275_1 = arg276_1 = arg277_1 = arg278_1 = arg279_1 = arg280_1 = arg281_1 = arg282_1 = arg283_1 = arg284_1 = arg285_1 = arg286_1 = arg287_1 = arg288_1 = arg289_1 = arg290_1 = arg291_1 = arg292_1 = arg293_1 = arg294_1 = arg295_1 = arg296_1 = arg297_1 = arg298_1 = None\n    getitem_740 = _foreach_mul_2[0]\n    getitem_741 = _foreach_mul_2[1]\n    getitem_742 = _foreach_mul_2[2]\n    getitem_743 = _foreach_mul_2[3]\n    getitem_744 = _foreach_mul_2[4]\n    getitem_745 = _foreach_mul_2[5]\n    getitem_746 = _foreach_mul_2[6]\n    getitem_747 = _foreach_mul_2[7]\n    getitem_748 = _foreach_mul_2[8]\n    getitem_749 = _foreach_mul_2[9]\n    getitem_750 = _foreach_mul_2[10]\n    getitem_751 = _foreach_mul_2[11]\n    getitem_752 = _foreach_mul_2[12]\n    getitem_753 = _foreach_mul_2[13]\n    getitem_754 = _foreach_mul_2[14]\n    getitem_755 = _foreach_mul_2[15]\n    getitem_756 = _foreach_mul_2[16]\n    getitem_757 = _foreach_mul_2[17]\n    getitem_758 = _foreach_mul_2[18]\n    getitem_759 = _foreach_mul_2[19]\n    getitem_760 = _foreach_mul_2[20]\n    getitem_761 = _foreach_mul_2[21]\n    getitem_762 = _foreach_mul_2[22]\n    getitem_763 = _foreach_mul_2[23]\n    getitem_764 = _foreach_mul_2[24]\n    getitem_765 = _foreach_mul_2[25]\n    getitem_766 = _foreach_mul_2[26]\n    getitem_767 = _foreach_mul_2[27]\n    getitem_768 = _foreach_mul_2[28]\n    getitem_769 = _foreach_mul_2[29]\n    getitem_770 = _foreach_mul_2[30]\n    getitem_771 = _foreach_mul_2[31]\n    getitem_772 = _foreach_mul_2[32]\n    getitem_773 = _foreach_mul_2[33]\n    getitem_774 = _foreach_mul_2[34]\n    getitem_775 = _foreach_mul_2[35]\n    getitem_776 = _foreach_mul_2[36]\n    getitem_777 = _foreach_mul_2[37]\n    getitem_778 = _foreach_mul_2[38]\n    getitem_779 = _foreach_mul_2[39]\n    getitem_780 = _foreach_mul_2[40]\n    getitem_781 = _foreach_mul_2[41]\n    getitem_782 = _foreach_mul_2[42]\n    getitem_783 = _foreach_mul_2[43]\n    getitem_784 = _foreach_mul_2[44]\n    getitem_785 = _foreach_mul_2[45]\n    getitem_786 = _foreach_mul_2[46]\n    getitem_787 = _foreach_mul_2[47]\n    getitem_788 = _foreach_mul_2[48]\n    getitem_789 = _foreach_mul_2[49]\n    getitem_790 = _foreach_mul_2[50]\n    getitem_791 = _foreach_mul_2[51]\n    getitem_792 = _foreach_mul_2[52]\n    getitem_793 = _foreach_mul_2[53]\n    getitem_794 = _foreach_mul_2[54]\n    getitem_795 = _foreach_mul_2[55]\n    getitem_796 = _foreach_mul_2[56]\n    getitem_797 = _foreach_mul_2[57]\n    getitem_798 = _foreach_mul_2[58]\n    getitem_799 = _foreach_mul_2[59]\n    getitem_800 = _foreach_mul_2[60]\n    getitem_801 = _foreach_mul_2[61]\n    getitem_802 = _foreach_mul_2[62]\n    getitem_803 = _foreach_mul_2[63]\n    getitem_804 = _foreach_mul_2[64]\n    getitem_805 = _foreach_mul_2[65]\n    getitem_806 = _foreach_mul_2[66]\n    getitem_807 = _foreach_mul_2[67]\n    getitem_808 = _foreach_mul_2[68]\n    getitem_809 = _foreach_mul_2[69]\n    getitem_810 = _foreach_mul_2[70]\n    getitem_811 = _foreach_mul_2[71]\n    getitem_812 = _foreach_mul_2[72]\n    getitem_813 = _foreach_mul_2[73]\n    getitem_814 = _foreach_mul_2[74]\n    getitem_815 = _foreach_mul_2[75]\n    getitem_816 = _foreach_mul_2[76]\n    getitem_817 = _foreach_mul_2[77]\n    getitem_818 = _foreach_mul_2[78]\n    getitem_819 = _foreach_mul_2[79]\n    getitem_820 = _foreach_mul_2[80]\n    getitem_821 = _foreach_mul_2[81]\n    getitem_822 = _foreach_mul_2[82]\n    getitem_823 = _foreach_mul_2[83]\n    getitem_824 = _foreach_mul_2[84]\n    getitem_825 = _foreach_mul_2[85]\n    getitem_826 = _foreach_mul_2[86]\n    getitem_827 = _foreach_mul_2[87]\n    getitem_828 = _foreach_mul_2[88]\n    getitem_829 = _foreach_mul_2[89]\n    getitem_830 = _foreach_mul_2[90]\n    getitem_831 = _foreach_mul_2[91]\n    getitem_832 = _foreach_mul_2[92]\n    getitem_833 = _foreach_mul_2[93]\n    getitem_834 = _foreach_mul_2[94]\n    getitem_835 = _foreach_mul_2[95]\n    getitem_836 = _foreach_mul_2[96]\n    getitem_837 = _foreach_mul_2[97]\n    getitem_838 = _foreach_mul_2[98]\n    getitem_839 = _foreach_mul_2[99]\n    getitem_840 = _foreach_mul_2[100]\n    getitem_841 = _foreach_mul_2[101]\n    getitem_842 = _foreach_mul_2[102]\n    getitem_843 = _foreach_mul_2[103]\n    getitem_844 = _foreach_mul_2[104]\n    getitem_845 = _foreach_mul_2[105]\n    getitem_846 = _foreach_mul_2[106]\n    getitem_847 = _foreach_mul_2[107]\n    getitem_848 = _foreach_mul_2[108]\n    getitem_849 = _foreach_mul_2[109]\n    getitem_850 = _foreach_mul_2[110]\n    getitem_851 = _foreach_mul_2[111]\n    getitem_852 = _foreach_mul_2[112]\n    getitem_853 = _foreach_mul_2[113]\n    getitem_854 = _foreach_mul_2[114]\n    getitem_855 = _foreach_mul_2[115]\n    getitem_856 = _foreach_mul_2[116]\n    getitem_857 = _foreach_mul_2[117]\n    getitem_858 = _foreach_mul_2[118]\n    getitem_859 = _foreach_mul_2[119]\n    getitem_860 = _foreach_mul_2[120]\n    getitem_861 = _foreach_mul_2[121]\n    getitem_862 = _foreach_mul_2[122]\n    getitem_863 = _foreach_mul_2[123]\n    getitem_864 = _foreach_mul_2[124]\n    getitem_865 = _foreach_mul_2[125]\n    getitem_866 = _foreach_mul_2[126]\n    getitem_867 = _foreach_mul_2[127]\n    getitem_868 = _foreach_mul_2[128]\n    getitem_869 = _foreach_mul_2[129]\n    getitem_870 = _foreach_mul_2[130]\n    getitem_871 = _foreach_mul_2[131]\n    getitem_872 = _foreach_mul_2[132]\n    getitem_873 = _foreach_mul_2[133]\n    getitem_874 = _foreach_mul_2[134]\n    getitem_875 = _foreach_mul_2[135]\n    getitem_876 = _foreach_mul_2[136]\n    getitem_877 = _foreach_mul_2[137]\n    getitem_878 = _foreach_mul_2[138]\n    getitem_879 = _foreach_mul_2[139]\n    getitem_880 = _foreach_mul_2[140]\n    getitem_881 = _foreach_mul_2[141]\n    getitem_882 = _foreach_mul_2[142]\n    getitem_883 = _foreach_mul_2[143]\n    getitem_884 = _foreach_mul_2[144]\n    getitem_885 = _foreach_mul_2[145]\n    getitem_886 = _foreach_mul_2[146]\n    getitem_887 = _foreach_mul_2[147];  _foreach_mul_2 = None\n    _foreach_add_2 = torch.ops.aten._foreach_add.List([getitem_592, getitem_593, getitem_594, getitem_595, getitem_596, getitem_597, getitem_598, getitem_599, getitem_600, getitem_601, getitem_602, getitem_603, getitem_604, getitem_605, getitem_606, getitem_607, getitem_608, getitem_609, getitem_610, getitem_611, getitem_612, getitem_613, getitem_614, getitem_615, getitem_616, getitem_617, getitem_618, getitem_619, getitem_620, getitem_621, getitem_622, getitem_623, getitem_624, getitem_625, getitem_626, getitem_627, getitem_628, getitem_629, getitem_630, getitem_631, getitem_632, getitem_633, getitem_634, getitem_635, getitem_636, getitem_637, getitem_638, getitem_639, getitem_640, getitem_641, getitem_642, getitem_643, getitem_644, getitem_645, getitem_646, getitem_647, getitem_648, getitem_649, getitem_650, getitem_651, getitem_652, getitem_653, getitem_654, getitem_655, getitem_656, getitem_657, getitem_658, getitem_659, getitem_660, getitem_661, getitem_662, getitem_663, getitem_664, getitem_665, getitem_666, getitem_667, getitem_668, getitem_669, getitem_670, getitem_671, getitem_672, getitem_673, getitem_674, getitem_675, getitem_676, getitem_677, getitem_678, getitem_679, getitem_680, getitem_681, getitem_682, getitem_683, getitem_684, getitem_685, getitem_686, getitem_687, getitem_688, getitem_689, getitem_690, getitem_691, getitem_692, getitem_693, getitem_694, getitem_695, getitem_696, getitem_697, getitem_698, getitem_699, getitem_700, getitem_701, getitem_702, getitem_703, getitem_704, getitem_705, getitem_706, getitem_707, getitem_708, getitem_709, getitem_710, getitem_711, getitem_712, getitem_713, getitem_714, getitem_715, getitem_716, getitem_717, getitem_718, getitem_719, getitem_720, getitem_721, getitem_722, getitem_723, getitem_724, getitem_725, getitem_726, getitem_727, getitem_728, getitem_729, getitem_730, getitem_731, getitem_732, getitem_733, getitem_734, getitem_735, getitem_736, getitem_737, getitem_738, getitem_739], [getitem_740, getitem_741, getitem_742, getitem_743, getitem_744, getitem_745, getitem_746, getitem_747, getitem_748, getitem_749, getitem_750, getitem_751, getitem_752, getitem_753, getitem_754, getitem_755, getitem_756, getitem_757, getitem_758, getitem_759, getitem_760, getitem_761, getitem_762, getitem_763, getitem_764, getitem_765, getitem_766, getitem_767, getitem_768, getitem_769, getitem_770, getitem_771, getitem_772, getitem_773, getitem_774, getitem_775, getitem_776, getitem_777, getitem_778, getitem_779, getitem_780, getitem_781, getitem_782, getitem_783, getitem_784, getitem_785, getitem_786, getitem_787, getitem_788, getitem_789, getitem_790, getitem_791, getitem_792, getitem_793, getitem_794, getitem_795, getitem_796, getitem_797, getitem_798, getitem_799, getitem_800, getitem_801, getitem_802, getitem_803, getitem_804, getitem_805, getitem_806, getitem_807, getitem_808, getitem_809, getitem_810, getitem_811, getitem_812, getitem_813, getitem_814, getitem_815, getitem_816, getitem_817, getitem_818, getitem_819, getitem_820, getitem_821, getitem_822, getitem_823, getitem_824, getitem_825, getitem_826, getitem_827, getitem_828, getitem_829, getitem_830, getitem_831, getitem_832, getitem_833, getitem_834, getitem_835, getitem_836, getitem_837, getitem_838, getitem_839, getitem_840, getitem_841, getitem_842, getitem_843, getitem_844, getitem_845, getitem_846, getitem_847, getitem_848, getitem_849, getitem_850, getitem_851, getitem_852, getitem_853, getitem_854, getitem_855, getitem_856, getitem_857, getitem_858, getitem_859, getitem_860, getitem_861, getitem_862, getitem_863, getitem_864, getitem_865, getitem_866, getitem_867, getitem_868, getitem_869, getitem_870, getitem_871, getitem_872, getitem_873, getitem_874, getitem_875, getitem_876, getitem_877, getitem_878, getitem_879, getitem_880, getitem_881, getitem_882, getitem_883, getitem_884, getitem_885, getitem_886, getitem_887], alpha = 0.0010000000000000009);  getitem_592 = getitem_593 = getitem_594 = getitem_595 = getitem_596 = getitem_597 = getitem_598 = getitem_599 = getitem_600 = getitem_601 = getitem_602 = getitem_603 = getitem_604 = getitem_605 = getitem_606 = getitem_607 = getitem_608 = getitem_609 = getitem_610 = getitem_611 = getitem_612 = getitem_613 = getitem_614 = getitem_615 = getitem_616 = getitem_617 = getitem_618 = getitem_619 = getitem_620 = getitem_621 = getitem_622 = getitem_623 = getitem_624 = getitem_625 = getitem_626 = getitem_627 = getitem_628 = getitem_629 = getitem_630 = getitem_631 = getitem_632 = getitem_633 = getitem_634 = getitem_635 = getitem_636 = getitem_637 = getitem_638 = getitem_639 = getitem_640 = getitem_641 = getitem_642 = getitem_643 = getitem_644 = getitem_645 = getitem_646 = getitem_647 = getitem_648 = getitem_649 = getitem_650 = getitem_651 = getitem_652 = getitem_653 = getitem_654 = getitem_655 = getitem_656 = getitem_657 = getitem_658 = getitem_659 = getitem_660 = getitem_661 = getitem_662 = getitem_663 = getitem_664 = getitem_665 = getitem_666 = getitem_667 = getitem_668 = getitem_669 = getitem_670 = getitem_671 = getitem_672 = getitem_673 = getitem_674 = getitem_675 = getitem_676 = getitem_677 = getitem_678 = getitem_679 = getitem_680 = getitem_681 = getitem_682 = getitem_683 = getitem_684 = getitem_685 = getitem_686 = getitem_687 = getitem_688 = getitem_689 = getitem_690 = getitem_691 = getitem_692 = getitem_693 = getitem_694 = getitem_695 = getitem_696 = getitem_697 = getitem_698 = getitem_699 = getitem_700 = getitem_701 = getitem_702 = getitem_703 = getitem_704 = getitem_705 = getitem_706 = getitem_707 = getitem_708 = getitem_709 = getitem_710 = getitem_711 = getitem_712 = getitem_713 = getitem_714 = getitem_715 = getitem_716 = getitem_717 = getitem_718 = getitem_719 = getitem_720 = getitem_721 = getitem_722 = getitem_723 = getitem_724 = getitem_725 = getitem_726 = getitem_727 = getitem_728 = getitem_729 = getitem_730 = getitem_731 = getitem_732 = getitem_733 = getitem_734 = getitem_735 = getitem_736 = getitem_737 = getitem_738 = getitem_739 = getitem_740 = getitem_741 = getitem_742 = getitem_743 = getitem_744 = getitem_745 = getitem_746 = getitem_747 = getitem_748 = getitem_749 = getitem_750 = getitem_751 = getitem_752 = getitem_753 = getitem_754 = getitem_755 = getitem_756 = getitem_757 = getitem_758 = getitem_759 = getitem_760 = getitem_761 = getitem_762 = getitem_763 = getitem_764 = getitem_765 = getitem_766 = getitem_767 = getitem_768 = getitem_769 = getitem_770 = getitem_771 = getitem_772 = getitem_773 = getitem_774 = getitem_775 = getitem_776 = getitem_777 = getitem_778 = getitem_779 = getitem_780 = getitem_781 = getitem_782 = getitem_783 = getitem_784 = getitem_785 = getitem_786 = getitem_787 = getitem_788 = getitem_789 = getitem_790 = getitem_791 = getitem_792 = getitem_793 = getitem_794 = getitem_795 = getitem_796 = getitem_797 = getitem_798 = getitem_799 = getitem_800 = getitem_801 = getitem_802 = getitem_803 = getitem_804 = getitem_805 = getitem_806 = getitem_807 = getitem_808 = getitem_809 = getitem_810 = getitem_811 = getitem_812 = getitem_813 = getitem_814 = getitem_815 = getitem_816 = getitem_817 = getitem_818 = getitem_819 = getitem_820 = getitem_821 = getitem_822 = getitem_823 = getitem_824 = getitem_825 = getitem_826 = getitem_827 = getitem_828 = getitem_829 = getitem_830 = getitem_831 = getitem_832 = getitem_833 = getitem_834 = getitem_835 = getitem_836 = getitem_837 = getitem_838 = getitem_839 = getitem_840 = getitem_841 = getitem_842 = getitem_843 = getitem_844 = getitem_845 = getitem_846 = getitem_847 = getitem_848 = getitem_849 = getitem_850 = getitem_851 = getitem_852 = getitem_853 = getitem_854 = getitem_855 = getitem_856 = getitem_857 = getitem_858 = getitem_859 = getitem_860 = getitem_861 = getitem_862 = getitem_863 = getitem_864 = getitem_865 = getitem_866 = getitem_867 = getitem_868 = getitem_869 = getitem_870 = getitem_871 = getitem_872 = getitem_873 = getitem_874 = getitem_875 = getitem_876 = getitem_877 = getitem_878 = getitem_879 = getitem_880 = getitem_881 = getitem_882 = getitem_883 = getitem_884 = getitem_885 = getitem_886 = getitem_887 = None\n    getitem_888 = _foreach_add_2[0]\n    getitem_889 = _foreach_add_2[1]\n    getitem_890 = _foreach_add_2[2]\n    getitem_891 = _foreach_add_2[3]\n    getitem_892 = _foreach_add_2[4]\n    getitem_893 = _foreach_add_2[5]\n    getitem_894 = _foreach_add_2[6]\n    getitem_895 = _foreach_add_2[7]\n    getitem_896 = _foreach_add_2[8]\n    getitem_897 = _foreach_add_2[9]\n    getitem_898 = _foreach_add_2[10]\n    getitem_899 = _foreach_add_2[11]\n    getitem_900 = _foreach_add_2[12]\n    getitem_901 = _foreach_add_2[13]\n    getitem_902 = _foreach_add_2[14]\n    getitem_903 = _foreach_add_2[15]\n    getitem_904 = _foreach_add_2[16]\n    getitem_905 = _foreach_add_2[17]\n    getitem_906 = _foreach_add_2[18]\n    getitem_907 = _foreach_add_2[19]\n    getitem_908 = _foreach_add_2[20]\n    getitem_909 = _foreach_add_2[21]\n    getitem_910 = _foreach_add_2[22]\n    getitem_911 = _foreach_add_2[23]\n    getitem_912 = _foreach_add_2[24]\n    getitem_913 = _foreach_add_2[25]\n    getitem_914 = _foreach_add_2[26]\n    getitem_915 = _foreach_add_2[27]\n    getitem_916 = _foreach_add_2[28]\n    getitem_917 = _foreach_add_2[29]\n    getitem_918 = _foreach_add_2[30]\n    getitem_919 = _foreach_add_2[31]\n    getitem_920 = _foreach_add_2[32]\n    getitem_921 = _foreach_add_2[33]\n    getitem_922 = _foreach_add_2[34]\n    getitem_923 = _foreach_add_2[35]\n    getitem_924 = _foreach_add_2[36]\n    getitem_925 = _foreach_add_2[37]\n    getitem_926 = _foreach_add_2[38]\n    getitem_927 = _foreach_add_2[39]\n    getitem_928 = _foreach_add_2[40]\n    getitem_929 = _foreach_add_2[41]\n    getitem_930 = _foreach_add_2[42]\n    getitem_931 = _foreach_add_2[43]\n    getitem_932 = _foreach_add_2[44]\n    getitem_933 = _foreach_add_2[45]\n    getitem_934 = _foreach_add_2[46]\n    getitem_935 = _foreach_add_2[47]\n    getitem_936 = _foreach_add_2[48]\n    getitem_937 = _foreach_add_2[49]\n    getitem_938 = _foreach_add_2[50]\n    getitem_939 = _foreach_add_2[51]\n    getitem_940 = _foreach_add_2[52]\n    getitem_941 = _foreach_add_2[53]\n    getitem_942 = _foreach_add_2[54]\n    getitem_943 = _foreach_add_2[55]\n    getitem_944 = _foreach_add_2[56]\n    getitem_945 = _foreach_add_2[57]\n    getitem_946 = _foreach_add_2[58]\n    getitem_947 = _foreach_add_2[59]\n    getitem_948 = _foreach_add_2[60]\n    getitem_949 = _foreach_add_2[61]\n    getitem_950 = _foreach_add_2[62]\n    getitem_951 = _foreach_add_2[63]\n    getitem_952 = _foreach_add_2[64]\n    getitem_953 = _foreach_add_2[65]\n    getitem_954 = _foreach_add_2[66]\n    getitem_955 = _foreach_add_2[67]\n    getitem_956 = _foreach_add_2[68]\n    getitem_957 = _foreach_add_2[69]\n    getitem_958 = _foreach_add_2[70]\n    getitem_959 = _foreach_add_2[71]\n    getitem_960 = _foreach_add_2[72]\n    getitem_961 = _foreach_add_2[73]\n    getitem_962 = _foreach_add_2[74]\n    getitem_963 = _foreach_add_2[75]\n    getitem_964 = _foreach_add_2[76]\n    getitem_965 = _foreach_add_2[77]\n    getitem_966 = _foreach_add_2[78]\n    getitem_967 = _foreach_add_2[79]\n    getitem_968 = _foreach_add_2[80]\n    getitem_969 = _foreach_add_2[81]\n    getitem_970 = _foreach_add_2[82]\n    getitem_971 = _foreach_add_2[83]\n    getitem_972 = _foreach_add_2[84]\n    getitem_973 = _foreach_add_2[85]\n    getitem_974 = _foreach_add_2[86]\n    getitem_975 = _foreach_add_2[87]\n    getitem_976 = _foreach_add_2[88]\n    getitem_977 = _foreach_add_2[89]\n    getitem_978 = _foreach_add_2[90]\n    getitem_979 = _foreach_add_2[91]\n    getitem_980 = _foreach_add_2[92]\n    getitem_981 = _foreach_add_2[93]\n    getitem_982 = _foreach_add_2[94]\n    getitem_983 = _foreach_add_2[95]\n    getitem_984 = _foreach_add_2[96]\n    getitem_985 = _foreach_add_2[97]\n    getitem_986 = _foreach_add_2[98]\n    getitem_987 = _foreach_add_2[99]\n    getitem_988 = _foreach_add_2[100]\n    getitem_989 = _foreach_add_2[101]\n    getitem_990 = _foreach_add_2[102]\n    getitem_991 = _foreach_add_2[103]\n    getitem_992 = _foreach_add_2[104]\n    getitem_993 = _foreach_add_2[105]\n    getitem_994 = _foreach_add_2[106]\n    getitem_995 = _foreach_add_2[107]\n    getitem_996 = _foreach_add_2[108]\n    getitem_997 = _foreach_add_2[109]\n    getitem_998 = _foreach_add_2[110]\n    getitem_999 = _foreach_add_2[111]\n    getitem_1000 = _foreach_add_2[112]\n    getitem_1001 = _foreach_add_2[113]\n    getitem_1002 = _foreach_add_2[114]\n    getitem_1003 = _foreach_add_2[115]\n    getitem_1004 = _foreach_add_2[116]\n    getitem_1005 = _foreach_add_2[117]\n    getitem_1006 = _foreach_add_2[118]\n    getitem_1007 = _foreach_add_2[119]\n    getitem_1008 = _foreach_add_2[120]\n    getitem_1009 = _foreach_add_2[121]\n    getitem_1010 = _foreach_add_2[122]\n    getitem_1011 = _foreach_add_2[123]\n    getitem_1012 = _foreach_add_2[124]\n    getitem_1013 = _foreach_add_2[125]\n    getitem_1014 = _foreach_add_2[126]\n    getitem_1015 = _foreach_add_2[127]\n    getitem_1016 = _foreach_add_2[128]\n    getitem_1017 = _foreach_add_2[129]\n    getitem_1018 = _foreach_add_2[130]\n    getitem_1019 = _foreach_add_2[131]\n    getitem_1020 = _foreach_add_2[132]\n    getitem_1021 = _foreach_add_2[133]\n    getitem_1022 = _foreach_add_2[134]\n    getitem_1023 = _foreach_add_2[135]\n    getitem_1024 = _foreach_add_2[136]\n    getitem_1025 = _foreach_add_2[137]\n    getitem_1026 = _foreach_add_2[138]\n    getitem_1027 = _foreach_add_2[139]\n    getitem_1028 = _foreach_add_2[140]\n    getitem_1029 = _foreach_add_2[141]\n    getitem_1030 = _foreach_add_2[142]\n    getitem_1031 = _foreach_add_2[143]\n    getitem_1032 = _foreach_add_2[144]\n    getitem_1033 = _foreach_add_2[145]\n    getitem_1034 = _foreach_add_2[146]\n    getitem_1035 = _foreach_add_2[147];  _foreach_add_2 = None\n    _foreach_pow = torch.ops.aten._foreach_pow.ScalarAndTensor(0.9, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n    getitem_1036 = _foreach_pow[0]\n    getitem_1037 = _foreach_pow[1]\n    getitem_1038 = _foreach_pow[2]\n    getitem_1039 = _foreach_pow[3]\n    getitem_1040 = _foreach_pow[4]\n    getitem_1041 = _foreach_pow[5]\n    getitem_1042 = _foreach_pow[6]\n    getitem_1043 = _foreach_pow[7]\n    getitem_1044 = _foreach_pow[8]\n    getitem_1045 = _foreach_pow[9]\n    getitem_1046 = _foreach_pow[10]\n    getitem_1047 = _foreach_pow[11]\n    getitem_1048 = _foreach_pow[12]\n    getitem_1049 = _foreach_pow[13]\n    getitem_1050 = _foreach_pow[14]\n    getitem_1051 = _foreach_pow[15]\n    getitem_1052 = _foreach_pow[16]\n    getitem_1053 = _foreach_pow[17]\n    getitem_1054 = _foreach_pow[18]\n    getitem_1055 = _foreach_pow[19]\n    getitem_1056 = _foreach_pow[20]\n    getitem_1057 = _foreach_pow[21]\n    getitem_1058 = _foreach_pow[22]\n    getitem_1059 = _foreach_pow[23]\n    getitem_1060 = _foreach_pow[24]\n    getitem_1061 = _foreach_pow[25]\n    getitem_1062 = _foreach_pow[26]\n    getitem_1063 = _foreach_pow[27]\n    getitem_1064 = _foreach_pow[28]\n    getitem_1065 = _foreach_pow[29]\n    getitem_1066 = _foreach_pow[30]\n    getitem_1067 = _foreach_pow[31]\n    getitem_1068 = _foreach_pow[32]\n    getitem_1069 = _foreach_pow[33]\n    getitem_1070 = _foreach_pow[34]\n    getitem_1071 = _foreach_pow[35]\n    getitem_1072 = _foreach_pow[36]\n    getitem_1073 = _foreach_pow[37]\n    getitem_1074 = _foreach_pow[38]\n    getitem_1075 = _foreach_pow[39]\n    getitem_1076 = _foreach_pow[40]\n    getitem_1077 = _foreach_pow[41]\n    getitem_1078 = _foreach_pow[42]\n    getitem_1079 = _foreach_pow[43]\n    getitem_1080 = _foreach_pow[44]\n    getitem_1081 = _foreach_pow[45]\n    getitem_1082 = _foreach_pow[46]\n    getitem_1083 = _foreach_pow[47]\n    getitem_1084 = _foreach_pow[48]\n    getitem_1085 = _foreach_pow[49]\n    getitem_1086 = _foreach_pow[50]\n    getitem_1087 = _foreach_pow[51]\n    getitem_1088 = _foreach_pow[52]\n    getitem_1089 = _foreach_pow[53]\n    getitem_1090 = _foreach_pow[54]\n    getitem_1091 = _foreach_pow[55]\n    getitem_1092 = _foreach_pow[56]\n    getitem_1093 = _foreach_pow[57]\n    getitem_1094 = _foreach_pow[58]\n    getitem_1095 = _foreach_pow[59]\n    getitem_1096 = _foreach_pow[60]\n    getitem_1097 = _foreach_pow[61]\n    getitem_1098 = _foreach_pow[62]\n    getitem_1099 = _foreach_pow[63]\n    getitem_1100 = _foreach_pow[64]\n    getitem_1101 = _foreach_pow[65]\n    getitem_1102 = _foreach_pow[66]\n    getitem_1103 = _foreach_pow[67]\n    getitem_1104 = _foreach_pow[68]\n    getitem_1105 = _foreach_pow[69]\n    getitem_1106 = _foreach_pow[70]\n    getitem_1107 = _foreach_pow[71]\n    getitem_1108 = _foreach_pow[72]\n    getitem_1109 = _foreach_pow[73]\n    getitem_1110 = _foreach_pow[74]\n    getitem_1111 = _foreach_pow[75]\n    getitem_1112 = _foreach_pow[76]\n    getitem_1113 = _foreach_pow[77]\n    getitem_1114 = _foreach_pow[78]\n    getitem_1115 = _foreach_pow[79]\n    getitem_1116 = _foreach_pow[80]\n    getitem_1117 = _foreach_pow[81]\n    getitem_1118 = _foreach_pow[82]\n    getitem_1119 = _foreach_pow[83]\n    getitem_1120 = _foreach_pow[84]\n    getitem_1121 = _foreach_pow[85]\n    getitem_1122 = _foreach_pow[86]\n    getitem_1123 = _foreach_pow[87]\n    getitem_1124 = _foreach_pow[88]\n    getitem_1125 = _foreach_pow[89]\n    getitem_1126 = _foreach_pow[90]\n    getitem_1127 = _foreach_pow[91]\n    getitem_1128 = _foreach_pow[92]\n    getitem_1129 = _foreach_pow[93]\n    getitem_1130 = _foreach_pow[94]\n    getitem_1131 = _foreach_pow[95]\n    getitem_1132 = _foreach_pow[96]\n    getitem_1133 = _foreach_pow[97]\n    getitem_1134 = _foreach_pow[98]\n    getitem_1135 = _foreach_pow[99]\n    getitem_1136 = _foreach_pow[100]\n    getitem_1137 = _foreach_pow[101]\n    getitem_1138 = _foreach_pow[102]\n    getitem_1139 = _foreach_pow[103]\n    getitem_1140 = _foreach_pow[104]\n    getitem_1141 = _foreach_pow[105]\n    getitem_1142 = _foreach_pow[106]\n    getitem_1143 = _foreach_pow[107]\n    getitem_1144 = _foreach_pow[108]\n    getitem_1145 = _foreach_pow[109]\n    getitem_1146 = _foreach_pow[110]\n    getitem_1147 = _foreach_pow[111]\n    getitem_1148 = _foreach_pow[112]\n    getitem_1149 = _foreach_pow[113]\n    getitem_1150 = _foreach_pow[114]\n    getitem_1151 = _foreach_pow[115]\n    getitem_1152 = _foreach_pow[116]\n    getitem_1153 = _foreach_pow[117]\n    getitem_1154 = _foreach_pow[118]\n    getitem_1155 = _foreach_pow[119]\n    getitem_1156 = _foreach_pow[120]\n    getitem_1157 = _foreach_pow[121]\n    getitem_1158 = _foreach_pow[122]\n    getitem_1159 = _foreach_pow[123]\n    getitem_1160 = _foreach_pow[124]\n    getitem_1161 = _foreach_pow[125]\n    getitem_1162 = _foreach_pow[126]\n    getitem_1163 = _foreach_pow[127]\n    getitem_1164 = _foreach_pow[128]\n    getitem_1165 = _foreach_pow[129]\n    getitem_1166 = _foreach_pow[130]\n    getitem_1167 = _foreach_pow[131]\n    getitem_1168 = _foreach_pow[132]\n    getitem_1169 = _foreach_pow[133]\n    getitem_1170 = _foreach_pow[134]\n    getitem_1171 = _foreach_pow[135]\n    getitem_1172 = _foreach_pow[136]\n    getitem_1173 = _foreach_pow[137]\n    getitem_1174 = _foreach_pow[138]\n    getitem_1175 = _foreach_pow[139]\n    getitem_1176 = _foreach_pow[140]\n    getitem_1177 = _foreach_pow[141]\n    getitem_1178 = _foreach_pow[142]\n    getitem_1179 = _foreach_pow[143]\n    getitem_1180 = _foreach_pow[144]\n    getitem_1181 = _foreach_pow[145]\n    getitem_1182 = _foreach_pow[146]\n    getitem_1183 = _foreach_pow[147];  _foreach_pow = None\n    _foreach_pow_1 = torch.ops.aten._foreach_pow.ScalarAndTensor(0.999, [getitem, getitem_1, getitem_2, getitem_3, getitem_4, getitem_5, getitem_6, getitem_7, getitem_8, getitem_9, getitem_10, getitem_11, getitem_12, getitem_13, getitem_14, getitem_15, getitem_16, getitem_17, getitem_18, getitem_19, getitem_20, getitem_21, getitem_22, getitem_23, getitem_24, getitem_25, getitem_26, getitem_27, getitem_28, getitem_29, getitem_30, getitem_31, getitem_32, getitem_33, getitem_34, getitem_35, getitem_36, getitem_37, getitem_38, getitem_39, getitem_40, getitem_41, getitem_42, getitem_43, getitem_44, getitem_45, getitem_46, getitem_47, getitem_48, getitem_49, getitem_50, getitem_51, getitem_52, getitem_53, getitem_54, getitem_55, getitem_56, getitem_57, getitem_58, getitem_59, getitem_60, getitem_61, getitem_62, getitem_63, getitem_64, getitem_65, getitem_66, getitem_67, getitem_68, getitem_69, getitem_70, getitem_71, getitem_72, getitem_73, getitem_74, getitem_75, getitem_76, getitem_77, getitem_78, getitem_79, getitem_80, getitem_81, getitem_82, getitem_83, getitem_84, getitem_85, getitem_86, getitem_87, getitem_88, getitem_89, getitem_90, getitem_91, getitem_92, getitem_93, getitem_94, getitem_95, getitem_96, getitem_97, getitem_98, getitem_99, getitem_100, getitem_101, getitem_102, getitem_103, getitem_104, getitem_105, getitem_106, getitem_107, getitem_108, getitem_109, getitem_110, getitem_111, getitem_112, getitem_113, getitem_114, getitem_115, getitem_116, getitem_117, getitem_118, getitem_119, getitem_120, getitem_121, getitem_122, getitem_123, getitem_124, getitem_125, getitem_126, getitem_127, getitem_128, getitem_129, getitem_130, getitem_131, getitem_132, getitem_133, getitem_134, getitem_135, getitem_136, getitem_137, getitem_138, getitem_139, getitem_140, getitem_141, getitem_142, getitem_143, getitem_144, getitem_145, getitem_146, getitem_147])\n    getitem_1184 = _foreach_pow_1[0]\n    getitem_1185 = _foreach_pow_1[1]\n    getitem_1186 = _foreach_pow_1[2]\n    getitem_1187 = _foreach_pow_1[3]\n    getitem_1188 = _foreach_pow_1[4]\n    getitem_1189 = _foreach_pow_1[5]\n    getitem_1190 = _foreach_pow_1[6]\n    getitem_1191 = _foreach_pow_1[7]\n    getitem_1192 = _foreach_pow_1[8]\n    getitem_1193 = _foreach_pow_1[9]\n    getitem_1194 = _foreach_pow_1[10]\n    getitem_1195 = _foreach_pow_1[11]\n    getitem_1196 = _foreach_pow_1[12]\n    getitem_1197 = _foreach_pow_1[13]\n    getitem_1198 = _foreach_pow_1[14]\n    getitem_1199 = _foreach_pow_1[15]\n    getitem_1200 = _foreach_pow_1[16]\n    getitem_1201 = _foreach_pow_1[17]\n    getitem_1202 = _foreach_pow_1[18]\n    getitem_1203 = _foreach_pow_1[19]\n    getitem_1204 = _foreach_pow_1[20]\n    getitem_1205 = _foreach_pow_1[21]\n    getitem_1206 = _foreach_pow_1[22]\n    getitem_1207 = _foreach_pow_1[23]\n    getitem_1208 = _foreach_pow_1[24]\n    getitem_1209 = _foreach_pow_1[25]\n    getitem_1210 = _foreach_pow_1[26]\n    getitem_1211 = _foreach_pow_1[27]\n    getitem_1212 = _foreach_pow_1[28]\n    getitem_1213 = _foreach_pow_1[29]\n    getitem_1214 = _foreach_pow_1[30]\n    getitem_1215 = _foreach_pow_1[31]\n    getitem_1216 = _foreach_pow_1[32]\n    getitem_1217 = _foreach_pow_1[33]\n    getitem_1218 = _foreach_pow_1[34]\n    getitem_1219 = _foreach_pow_1[35]\n    getitem_1220 = _foreach_pow_1[36]\n    getitem_1221 = _foreach_pow_1[37]\n    getitem_1222 = _foreach_pow_1[38]\n    getitem_1223 = _foreach_pow_1[39]\n    getitem_1224 = _foreach_pow_1[40]\n    getitem_1225 = _foreach_pow_1[41]\n    getitem_1226 = _foreach_pow_1[42]\n    getitem_1227 = _foreach_pow_1[43]\n    getitem_1228 = _foreach_pow_1[44]\n    getitem_1229 = _foreach_pow_1[45]\n    getitem_1230 = _foreach_pow_1[46]\n    getitem_1231 = _foreach_pow_1[47]\n    getitem_1232 = _foreach_pow_1[48]\n    getitem_1233 = _foreach_pow_1[49]\n    getitem_1234 = _foreach_pow_1[50]\n    getitem_1235 = _foreach_pow_1[51]\n    getitem_1236 = _foreach_pow_1[52]\n    getitem_1237 = _foreach_pow_1[53]\n    getitem_1238 = _foreach_pow_1[54]\n    getitem_1239 = _foreach_pow_1[55]\n    getitem_1240 = _foreach_pow_1[56]\n    getitem_1241 = _foreach_pow_1[57]\n    getitem_1242 = _foreach_pow_1[58]\n    getitem_1243 = _foreach_pow_1[59]\n    getitem_1244 = _foreach_pow_1[60]\n    getitem_1245 = _foreach_pow_1[61]\n    getitem_1246 = _foreach_pow_1[62]\n    getitem_1247 = _foreach_pow_1[63]\n    getitem_1248 = _foreach_pow_1[64]\n    getitem_1249 = _foreach_pow_1[65]\n    getitem_1250 = _foreach_pow_1[66]\n    getitem_1251 = _foreach_pow_1[67]\n    getitem_1252 = _foreach_pow_1[68]\n    getitem_1253 = _foreach_pow_1[69]\n    getitem_1254 = _foreach_pow_1[70]\n    getitem_1255 = _foreach_pow_1[71]\n    getitem_1256 = _foreach_pow_1[72]\n    getitem_1257 = _foreach_pow_1[73]\n    getitem_1258 = _foreach_pow_1[74]\n    getitem_1259 = _foreach_pow_1[75]\n    getitem_1260 = _foreach_pow_1[76]\n    getitem_1261 = _foreach_pow_1[77]\n    getitem_1262 = _foreach_pow_1[78]\n    getitem_1263 = _foreach_pow_1[79]\n    getitem_1264 = _foreach_pow_1[80]\n    getitem_1265 = _foreach_pow_1[81]\n    getitem_1266 = _foreach_pow_1[82]\n    getitem_1267 = _foreach_pow_1[83]\n    getitem_1268 = _foreach_pow_1[84]\n    getitem_1269 = _foreach_pow_1[85]\n    getitem_1270 = _foreach_pow_1[86]\n    getitem_1271 = _foreach_pow_1[87]\n    getitem_1272 = _foreach_pow_1[88]\n    getitem_1273 = _foreach_pow_1[89]\n    getitem_1274 = _foreach_pow_1[90]\n    getitem_1275 = _foreach_pow_1[91]\n    getitem_1276 = _foreach_pow_1[92]\n    getitem_1277 = _foreach_pow_1[93]\n    getitem_1278 = _foreach_pow_1[94]\n    getitem_1279 = _foreach_pow_1[95]\n    getitem_1280 = _foreach_pow_1[96]\n    getitem_1281 = _foreach_pow_1[97]\n    getitem_1282 = _foreach_pow_1[98]\n    getitem_1283 = _foreach_pow_1[99]\n    getitem_1284 = _foreach_pow_1[100]\n    getitem_1285 = _foreach_pow_1[101]\n    getitem_1286 = _foreach_pow_1[102]\n    getitem_1287 = _foreach_pow_1[103]\n    getitem_1288 = _foreach_pow_1[104]\n    getitem_1289 = _foreach_pow_1[105]\n    getitem_1290 = _foreach_pow_1[106]\n    getitem_1291 = _foreach_pow_1[107]\n    getitem_1292 = _foreach_pow_1[108]\n    getitem_1293 = _foreach_pow_1[109]\n    getitem_1294 = _foreach_pow_1[110]\n    getitem_1295 = _foreach_pow_1[111]\n    getitem_1296 = _foreach_pow_1[112]\n    getitem_1297 = _foreach_pow_1[113]\n    getitem_1298 = _foreach_pow_1[114]\n    getitem_1299 = _foreach_pow_1[115]\n    getitem_1300 = _foreach_pow_1[116]\n    getitem_1301 = _foreach_pow_1[117]\n    getitem_1302 = _foreach_pow_1[118]\n    getitem_1303 = _foreach_pow_1[119]\n    getitem_1304 = _foreach_pow_1[120]\n    getitem_1305 = _foreach_pow_1[121]\n    getitem_1306 = _foreach_pow_1[122]\n    getitem_1307 = _foreach_pow_1[123]\n    getitem_1308 = _foreach_pow_1[124]\n    getitem_1309 = _foreach_pow_1[125]\n    getitem_1310 = _foreach_pow_1[126]\n    getitem_1311 = _foreach_pow_1[127]\n    getitem_1312 = _foreach_pow_1[128]\n    getitem_1313 = _foreach_pow_1[129]\n    getitem_1314 = _foreach_pow_1[130]\n    getitem_1315 = _foreach_pow_1[131]\n    getitem_1316 = _foreach_pow_1[132]\n    getitem_1317 = _foreach_pow_1[133]\n    getitem_1318 = _foreach_pow_1[134]\n    getitem_1319 = _foreach_pow_1[135]\n    getitem_1320 = _foreach_pow_1[136]\n    getitem_1321 = _foreach_pow_1[137]\n    getitem_1322 = _foreach_pow_1[138]\n    getitem_1323 = _foreach_pow_1[139]\n    getitem_1324 = _foreach_pow_1[140]\n    getitem_1325 = _foreach_pow_1[141]\n    getitem_1326 = _foreach_pow_1[142]\n    getitem_1327 = _foreach_pow_1[143]\n    getitem_1328 = _foreach_pow_1[144]\n    getitem_1329 = _foreach_pow_1[145]\n    getitem_1330 = _foreach_pow_1[146]\n    getitem_1331 = _foreach_pow_1[147];  _foreach_pow_1 = None\n    _foreach_sub_1 = torch.ops.aten._foreach_sub.Scalar([getitem_1036, getitem_1037, getitem_1038, getitem_1039, getitem_1040, getitem_1041, getitem_1042, getitem_1043, getitem_1044, getitem_1045, getitem_1046, getitem_1047, getitem_1048, getitem_1049, getitem_1050, getitem_1051, getitem_1052, getitem_1053, getitem_1054, getitem_1055, getitem_1056, getitem_1057, getitem_1058, getitem_1059, getitem_1060, getitem_1061, getitem_1062, getitem_1063, getitem_1064, getitem_1065, getitem_1066, getitem_1067, getitem_1068, getitem_1069, getitem_1070, getitem_1071, getitem_1072, getitem_1073, getitem_1074, getitem_1075, getitem_1076, getitem_1077, getitem_1078, getitem_1079, getitem_1080, getitem_1081, getitem_1082, getitem_1083, getitem_1084, getitem_1085, getitem_1086, getitem_1087, getitem_1088, getitem_1089, getitem_1090, getitem_1091, getitem_1092, getitem_1093, getitem_1094, getitem_1095, getitem_1096, getitem_1097, getitem_1098, getitem_1099, getitem_1100, getitem_1101, getitem_1102, getitem_1103, getitem_1104, getitem_1105, getitem_1106, getitem_1107, getitem_1108, getitem_1109, getitem_1110, getitem_1111, getitem_1112, getitem_1113, getitem_1114, getitem_1115, getitem_1116, getitem_1117, getitem_1118, getitem_1119, getitem_1120, getitem_1121, getitem_1122, getitem_1123, getitem_1124, getitem_1125, getitem_1126, getitem_1127, getitem_1128, getitem_1129, getitem_1130, getitem_1131, getitem_1132, getitem_1133, getitem_1134, getitem_1135, getitem_1136, getitem_1137, getitem_1138, getitem_1139, getitem_1140, getitem_1141, getitem_1142, getitem_1143, getitem_1144, getitem_1145, getitem_1146, getitem_1147, getitem_1148, getitem_1149, getitem_1150, getitem_1151, getitem_1152, getitem_1153, getitem_1154, getitem_1155, getitem_1156, getitem_1157, getitem_1158, getitem_1159, getitem_1160, getitem_1161, getitem_1162, getitem_1163, getitem_1164, getitem_1165, getitem_1166, getitem_1167, getitem_1168, getitem_1169, getitem_1170, getitem_1171, getitem_1172, getitem_1173, getitem_1174, getitem_1175, getitem_1176, getitem_1177, getitem_1178, getitem_1179, getitem_1180, getitem_1181, getitem_1182, getitem_1183], 1);  getitem_1036 = getitem_1037 = getitem_1038 = getitem_1039 = getitem_1040 = getitem_1041 = getitem_1042 = getitem_1043 = getitem_1044 = getitem_1045 = getitem_1046 = getitem_1047 = getitem_1048 = getitem_1049 = getitem_1050 = getitem_1051 = getitem_1052 = getitem_1053 = getitem_1054 = getitem_1055 = getitem_1056 = getitem_1057 = getitem_1058 = getitem_1059 = getitem_1060 = getitem_1061 = getitem_1062 = getitem_1063 = getitem_1064 = getitem_1065 = getitem_1066 = getitem_1067 = getitem_1068 = getitem_1069 = getitem_1070 = getitem_1071 = getitem_1072 = getitem_1073 = getitem_1074 = getitem_1075 = getitem_1076 = getitem_1077 = getitem_1078 = getitem_1079 = getitem_1080 = getitem_1081 = getitem_1082 = getitem_1083 = getitem_1084 = getitem_1085 = getitem_1086 = getitem_1087 = getitem_1088 = getitem_1089 = getitem_1090 = getitem_1091 = getitem_1092 = getitem_1093 = getitem_1094 = getitem_1095 = getitem_1096 = getitem_1097 = getitem_1098 = getitem_1099 = getitem_1100 = getitem_1101 = getitem_1102 = getitem_1103 = getitem_1104 = getitem_1105 = getitem_1106 = getitem_1107 = getitem_1108 = getitem_1109 = getitem_1110 = getitem_1111 = getitem_1112 = getitem_1113 = getitem_1114 = getitem_1115 = getitem_1116 = getitem_1117 = getitem_1118 = getitem_1119 = getitem_1120 = getitem_1121 = getitem_1122 = getitem_1123 = getitem_1124 = getitem_1125 = getitem_1126 = getitem_1127 = getitem_1128 = getitem_1129 = getitem_1130 = getitem_1131 = getitem_1132 = getitem_1133 = getitem_1134 = getitem_1135 = getitem_1136 = getitem_1137 = getitem_1138 = getitem_1139 = getitem_1140 = getitem_1141 = getitem_1142 = getitem_1143 = getitem_1144 = getitem_1145 = getitem_1146 = getitem_1147 = getitem_1148 = getitem_1149 = getitem_1150 = getitem_1151 = getitem_1152 = getitem_1153 = getitem_1154 = getitem_1155 = getitem_1156 = getitem_1157 = getitem_1158 = getitem_1159 = getitem_1160 = getitem_1161 = getitem_1162 = getitem_1163 = getitem_1164 = getitem_1165 = getitem_1166 = getitem_1167 = getitem_1168 = getitem_1169 = getitem_1170 = getitem_1171 = getitem_1172 = getitem_1173 = getitem_1174 = getitem_1175 = getitem_1176 = getitem_1177 = getitem_1178 = getitem_1179 = getitem_1180 = getitem_1181 = getitem_1182 = getitem_1183 = None\n    getitem_1332 = _foreach_sub_1[0]\n    getitem_1333 = _foreach_sub_1[1]\n    getitem_1334 = _foreach_sub_1[2]\n    getitem_1335 = _foreach_sub_1[3]\n    getitem_1336 = _foreach_sub_1[4]\n    getitem_1337 = _foreach_sub_1[5]\n    getitem_1338 = _foreach_sub_1[6]\n    getitem_1339 = _foreach_sub_1[7]\n    getitem_1340 = _foreach_sub_1[8]\n    getitem_1341 = _foreach_sub_1[9]\n    getitem_1342 = _foreach_sub_1[10]\n    getitem_1343 = _foreach_sub_1[11]\n    getitem_1344 = _foreach_sub_1[12]\n    getitem_1345 = _foreach_sub_1[13]\n    getitem_1346 = _foreach_sub_1[14]\n    getitem_1347 = _foreach_sub_1[15]\n    getitem_1348 = _foreach_sub_1[16]\n    getitem_1349 = _foreach_sub_1[17]\n    getitem_1350 = _foreach_sub_1[18]\n    getitem_1351 = _foreach_sub_1[19]\n    getitem_1352 = _foreach_sub_1[20]\n    getitem_1353 = _foreach_sub_1[21]\n    getitem_1354 = _foreach_sub_1[22]\n    getitem_1355 = _foreach_sub_1[23]\n    getitem_1356 = _foreach_sub_1[24]\n    getitem_1357 = _foreach_sub_1[25]\n    getitem_1358 = _foreach_sub_1[26]\n    getitem_1359 = _foreach_sub_1[27]\n    getitem_1360 = _foreach_sub_1[28]\n    getitem_1361 = _foreach_sub_1[29]\n    getitem_1362 = _foreach_sub_1[30]\n    getitem_1363 = _foreach_sub_1[31]\n    getitem_1364 = _foreach_sub_1[32]\n    getitem_1365 = _foreach_sub_1[33]\n    getitem_1366 = _foreach_sub_1[34]\n    getitem_1367 = _foreach_sub_1[35]\n    getitem_1368 = _foreach_sub_1[36]\n    getitem_1369 = _foreach_sub_1[37]\n    getitem_1370 = _foreach_sub_1[38]\n    getitem_1371 = _foreach_sub_1[39]\n    getitem_1372 = _foreach_sub_1[40]\n    getitem_1373 = _foreach_sub_1[41]\n    getitem_1374 = _foreach_sub_1[42]\n    getitem_1375 = _foreach_sub_1[43]\n    getitem_1376 = _foreach_sub_1[44]\n    getitem_1377 = _foreach_sub_1[45]\n    getitem_1378 = _foreach_sub_1[46]\n    getitem_1379 = _foreach_sub_1[47]\n    getitem_1380 = _foreach_sub_1[48]\n    getitem_1381 = _foreach_sub_1[49]\n    getitem_1382 = _foreach_sub_1[50]\n    getitem_1383 = _foreach_sub_1[51]\n    getitem_1384 = _foreach_sub_1[52]\n    getitem_1385 = _foreach_sub_1[53]\n    getitem_1386 = _foreach_sub_1[54]\n    getitem_1387 = _foreach_sub_1[55]\n    getitem_1388 = _foreach_sub_1[56]\n    getitem_1389 = _foreach_sub_1[57]\n    getitem_1390 = _foreach_sub_1[58]\n    getitem_1391 = _foreach_sub_1[59]\n    getitem_1392 = _foreach_sub_1[60]\n    getitem_1393 = _foreach_sub_1[61]\n    getitem_1394 = _foreach_sub_1[62]\n    getitem_1395 = _foreach_sub_1[63]\n    getitem_1396 = _foreach_sub_1[64]\n    getitem_1397 = _foreach_sub_1[65]\n    getitem_1398 = _foreach_sub_1[66]\n    getitem_1399 = _foreach_sub_1[67]\n    getitem_1400 = _foreach_sub_1[68]\n    getitem_1401 = _foreach_sub_1[69]\n    getitem_1402 = _foreach_sub_1[70]\n    getitem_1403 = _foreach_sub_1[71]\n    getitem_1404 = _foreach_sub_1[72]\n    getitem_1405 = _foreach_sub_1[73]\n    getitem_1406 = _foreach_sub_1[74]\n    getitem_1407 = _foreach_sub_1[75]\n    getitem_1408 = _foreach_sub_1[76]\n    getitem_1409 = _foreach_sub_1[77]\n    getitem_1410 = _foreach_sub_1[78]\n    getitem_1411 = _foreach_sub_1[79]\n    getitem_1412 = _foreach_sub_1[80]\n    getitem_1413 = _foreach_sub_1[81]\n    getitem_1414 = _foreach_sub_1[82]\n    getitem_1415 = _foreach_sub_1[83]\n    getitem_1416 = _foreach_sub_1[84]\n    getitem_1417 = _foreach_sub_1[85]\n    getitem_1418 = _foreach_sub_1[86]\n    getitem_1419 = _foreach_sub_1[87]\n    getitem_1420 = _foreach_sub_1[88]\n    getitem_1421 = _foreach_sub_1[89]\n    getitem_1422 = _foreach_sub_1[90]\n    getitem_1423 = _foreach_sub_1[91]\n    getitem_1424 = _foreach_sub_1[92]\n    getitem_1425 = _foreach_sub_1[93]\n    getitem_1426 = _foreach_sub_1[94]\n    getitem_1427 = _foreach_sub_1[95]\n    getitem_1428 = _foreach_sub_1[96]\n    getitem_1429 = _foreach_sub_1[97]\n    getitem_1430 = _foreach_sub_1[98]\n    getitem_1431 = _foreach_sub_1[99]\n    getitem_1432 = _foreach_sub_1[100]\n    getitem_1433 = _foreach_sub_1[101]\n    getitem_1434 = _foreach_sub_1[102]\n    getitem_1435 = _foreach_sub_1[103]\n    getitem_1436 = _foreach_sub_1[104]\n    getitem_1437 = _foreach_sub_1[105]\n    getitem_1438 = _foreach_sub_1[106]\n    getitem_1439 = _foreach_sub_1[107]\n    getitem_1440 = _foreach_sub_1[108]\n    getitem_1441 = _foreach_sub_1[109]\n    getitem_1442 = _foreach_sub_1[110]\n    getitem_1443 = _foreach_sub_1[111]\n    getitem_1444 = _foreach_sub_1[112]\n    getitem_1445 = _foreach_sub_1[113]\n    getitem_1446 = _foreach_sub_1[114]\n    getitem_1447 = _foreach_sub_1[115]\n    getitem_1448 = _foreach_sub_1[116]\n    getitem_1449 = _foreach_sub_1[117]\n    getitem_1450 = _foreach_sub_1[118]\n    getitem_1451 = _foreach_sub_1[119]\n    getitem_1452 = _foreach_sub_1[120]\n    getitem_1453 = _foreach_sub_1[121]\n    getitem_1454 = _foreach_sub_1[122]\n    getitem_1455 = _foreach_sub_1[123]\n    getitem_1456 = _foreach_sub_1[124]\n    getitem_1457 = _foreach_sub_1[125]\n    getitem_1458 = _foreach_sub_1[126]\n    getitem_1459 = _foreach_sub_1[127]\n    getitem_1460 = _foreach_sub_1[128]\n    getitem_1461 = _foreach_sub_1[129]\n    getitem_1462 = _foreach_sub_1[130]\n    getitem_1463 = _foreach_sub_1[131]\n    getitem_1464 = _foreach_sub_1[132]\n    getitem_1465 = _foreach_sub_1[133]\n    getitem_1466 = _foreach_sub_1[134]\n    getitem_1467 = _foreach_sub_1[135]\n    getitem_1468 = _foreach_sub_1[136]\n    getitem_1469 = _foreach_sub_1[137]\n    getitem_1470 = _foreach_sub_1[138]\n    getitem_1471 = _foreach_sub_1[139]\n    getitem_1472 = _foreach_sub_1[140]\n    getitem_1473 = _foreach_sub_1[141]\n    getitem_1474 = _foreach_sub_1[142]\n    getitem_1475 = _foreach_sub_1[143]\n    getitem_1476 = _foreach_sub_1[144]\n    getitem_1477 = _foreach_sub_1[145]\n    getitem_1478 = _foreach_sub_1[146]\n    getitem_1479 = _foreach_sub_1[147];  _foreach_sub_1 = None\n    _foreach_sub_2 = torch.ops.aten._foreach_sub.Scalar([getitem_1184, getitem_1185, getitem_1186, getitem_1187, getitem_1188, getitem_1189, getitem_1190, getitem_1191, getitem_1192, getitem_1193, getitem_1194, getitem_1195, getitem_1196, getitem_1197, getitem_1198, getitem_1199, getitem_1200, getitem_1201, getitem_1202, getitem_1203, getitem_1204, getitem_1205, getitem_1206, getitem_1207, getitem_1208, getitem_1209, getitem_1210, getitem_1211, getitem_1212, getitem_1213, getitem_1214, getitem_1215, getitem_1216, getitem_1217, getitem_1218, getitem_1219, getitem_1220, getitem_1221, getitem_1222, getitem_1223, getitem_1224, getitem_1225, getitem_1226, getitem_1227, getitem_1228, getitem_1229, getitem_1230, getitem_1231, getitem_1232, getitem_1233, getitem_1234, getitem_1235, getitem_1236, getitem_1237, getitem_1238, getitem_1239, getitem_1240, getitem_1241, getitem_1242, getitem_1243, getitem_1244, getitem_1245, getitem_1246, getitem_1247, getitem_1248, getitem_1249, getitem_1250, getitem_1251, getitem_1252, getitem_1253, getitem_1254, getitem_1255, getitem_1256, getitem_1257, getitem_1258, getitem_1259, getitem_1260, getitem_1261, getitem_1262, getitem_1263, getitem_1264, getitem_1265, getitem_1266, getitem_1267, getitem_1268, getitem_1269, getitem_1270, getitem_1271, getitem_1272, getitem_1273, getitem_1274, getitem_1275, getitem_1276, getitem_1277, getitem_1278, getitem_1279, getitem_1280, getitem_1281, getitem_1282, getitem_1283, getitem_1284, getitem_1285, getitem_1286, getitem_1287, getitem_1288, getitem_1289, getitem_1290, getitem_1291, getitem_1292, getitem_1293, getitem_1294, getitem_1295, getitem_1296, getitem_1297, getitem_1298, getitem_1299, getitem_1300, getitem_1301, getitem_1302, getitem_1303, getitem_1304, getitem_1305, getitem_1306, getitem_1307, getitem_1308, getitem_1309, getitem_1310, getitem_1311, getitem_1312, getitem_1313, getitem_1314, getitem_1315, getitem_1316, getitem_1317, getitem_1318, getitem_1319, getitem_1320, getitem_1321, getitem_1322, getitem_1323, getitem_1324, getitem_1325, getitem_1326, getitem_1327, getitem_1328, getitem_1329, getitem_1330, getitem_1331], 1);  getitem_1184 = getitem_1185 = getitem_1186 = getitem_1187 = getitem_1188 = getitem_1189 = getitem_1190 = getitem_1191 = getitem_1192 = getitem_1193 = getitem_1194 = getitem_1195 = getitem_1196 = getitem_1197 = getitem_1198 = getitem_1199 = getitem_1200 = getitem_1201 = getitem_1202 = getitem_1203 = getitem_1204 = getitem_1205 = getitem_1206 = getitem_1207 = getitem_1208 = getitem_1209 = getitem_1210 = getitem_1211 = getitem_1212 = getitem_1213 = getitem_1214 = getitem_1215 = getitem_1216 = getitem_1217 = getitem_1218 = getitem_1219 = getitem_1220 = getitem_1221 = getitem_1222 = getitem_1223 = getitem_1224 = getitem_1225 = getitem_1226 = getitem_1227 = getitem_1228 = getitem_1229 = getitem_1230 = getitem_1231 = getitem_1232 = getitem_1233 = getitem_1234 = getitem_1235 = getitem_1236 = getitem_1237 = getitem_1238 = getitem_1239 = getitem_1240 = getitem_1241 = getitem_1242 = getitem_1243 = getitem_1244 = getitem_1245 = getitem_1246 = getitem_1247 = getitem_1248 = getitem_1249 = getitem_1250 = getitem_1251 = getitem_1252 = getitem_1253 = getitem_1254 = getitem_1255 = getitem_1256 = getitem_1257 = getitem_1258 = getitem_1259 = getitem_1260 = getitem_1261 = getitem_1262 = getitem_1263 = getitem_1264 = getitem_1265 = getitem_1266 = getitem_1267 = getitem_1268 = getitem_1269 = getitem_1270 = getitem_1271 = getitem_1272 = getitem_1273 = getitem_1274 = getitem_1275 = getitem_1276 = getitem_1277 = getitem_1278 = getitem_1279 = getitem_1280 = getitem_1281 = getitem_1282 = getitem_1283 = getitem_1284 = getitem_1285 = getitem_1286 = getitem_1287 = getitem_1288 = getitem_1289 = getitem_1290 = getitem_1291 = getitem_1292 = getitem_1293 = getitem_1294 = getitem_1295 = getitem_1296 = getitem_1297 = getitem_1298 = getitem_1299 = getitem_1300 = getitem_1301 = getitem_1302 = getitem_1303 = getitem_1304 = getitem_1305 = getitem_1306 = getitem_1307 = getitem_1308 = getitem_1309 = getitem_1310 = getitem_1311 = getitem_1312 = getitem_1313 = getitem_1314 = getitem_1315 = getitem_1316 = getitem_1317 = getitem_1318 = getitem_1319 = getitem_1320 = getitem_1321 = getitem_1322 = getitem_1323 = getitem_1324 = getitem_1325 = getitem_1326 = getitem_1327 = getitem_1328 = getitem_1329 = getitem_1330 = getitem_1331 = None\n    getitem_1480 = _foreach_sub_2[0]\n    getitem_1481 = _foreach_sub_2[1]\n    getitem_1482 = _foreach_sub_2[2]\n    getitem_1483 = _foreach_sub_2[3]\n    getitem_1484 = _foreach_sub_2[4]\n    getitem_1485 = _foreach_sub_2[5]\n    getitem_1486 = _foreach_sub_2[6]\n    getitem_1487 = _foreach_sub_2[7]\n    getitem_1488 = _foreach_sub_2[8]\n    getitem_1489 = _foreach_sub_2[9]\n    getitem_1490 = _foreach_sub_2[10]\n    getitem_1491 = _foreach_sub_2[11]\n    getitem_1492 = _foreach_sub_2[12]\n    getitem_1493 = _foreach_sub_2[13]\n    getitem_1494 = _foreach_sub_2[14]\n    getitem_1495 = _foreach_sub_2[15]\n    getitem_1496 = _foreach_sub_2[16]\n    getitem_1497 = _foreach_sub_2[17]\n    getitem_1498 = _foreach_sub_2[18]\n    getitem_1499 = _foreach_sub_2[19]\n    getitem_1500 = _foreach_sub_2[20]\n    getitem_1501 = _foreach_sub_2[21]\n    getitem_1502 = _foreach_sub_2[22]\n    getitem_1503 = _foreach_sub_2[23]\n    getitem_1504 = _foreach_sub_2[24]\n    getitem_1505 = _foreach_sub_2[25]\n    getitem_1506 = _foreach_sub_2[26]\n    getitem_1507 = _foreach_sub_2[27]\n    getitem_1508 = _foreach_sub_2[28]\n    getitem_1509 = _foreach_sub_2[29]\n    getitem_1510 = _foreach_sub_2[30]\n    getitem_1511 = _foreach_sub_2[31]\n    getitem_1512 = _foreach_sub_2[32]\n    getitem_1513 = _foreach_sub_2[33]\n    getitem_1514 = _foreach_sub_2[34]\n    getitem_1515 = _foreach_sub_2[35]\n    getitem_1516 = _foreach_sub_2[36]\n    getitem_1517 = _foreach_sub_2[37]\n    getitem_1518 = _foreach_sub_2[38]\n    getitem_1519 = _foreach_sub_2[39]\n    getitem_1520 = _foreach_sub_2[40]\n    getitem_1521 = _foreach_sub_2[41]\n    getitem_1522 = _foreach_sub_2[42]\n    getitem_1523 = _foreach_sub_2[43]\n    getitem_1524 = _foreach_sub_2[44]\n    getitem_1525 = _foreach_sub_2[45]\n    getitem_1526 = _foreach_sub_2[46]\n    getitem_1527 = _foreach_sub_2[47]\n    getitem_1528 = _foreach_sub_2[48]\n    getitem_1529 = _foreach_sub_2[49]\n    getitem_1530 = _foreach_sub_2[50]\n    getitem_1531 = _foreach_sub_2[51]\n    getitem_1532 = _foreach_sub_2[52]\n    getitem_1533 = _foreach_sub_2[53]\n    getitem_1534 = _foreach_sub_2[54]\n    getitem_1535 = _foreach_sub_2[55]\n    getitem_1536 = _foreach_sub_2[56]\n    getitem_1537 = _foreach_sub_2[57]\n    getitem_1538 = _foreach_sub_2[58]\n    getitem_1539 = _foreach_sub_2[59]\n    getitem_1540 = _foreach_sub_2[60]\n    getitem_1541 = _foreach_sub_2[61]\n    getitem_1542 = _foreach_sub_2[62]\n    getitem_1543 = _foreach_sub_2[63]\n    getitem_1544 = _foreach_sub_2[64]\n    getitem_1545 = _foreach_sub_2[65]\n    getitem_1546 = _foreach_sub_2[66]\n    getitem_1547 = _foreach_sub_2[67]\n    getitem_1548 = _foreach_sub_2[68]\n    getitem_1549 = _foreach_sub_2[69]\n    getitem_1550 = _foreach_sub_2[70]\n    getitem_1551 = _foreach_sub_2[71]\n    getitem_1552 = _foreach_sub_2[72]\n    getitem_1553 = _foreach_sub_2[73]\n    getitem_1554 = _foreach_sub_2[74]\n    getitem_1555 = _foreach_sub_2[75]\n    getitem_1556 = _foreach_sub_2[76]\n    getitem_1557 = _foreach_sub_2[77]\n    getitem_1558 = _foreach_sub_2[78]\n    getitem_1559 = _foreach_sub_2[79]\n    getitem_1560 = _foreach_sub_2[80]\n    getitem_1561 = _foreach_sub_2[81]\n    getitem_1562 = _foreach_sub_2[82]\n    getitem_1563 = _foreach_sub_2[83]\n    getitem_1564 = _foreach_sub_2[84]\n    getitem_1565 = _foreach_sub_2[85]\n    getitem_1566 = _foreach_sub_2[86]\n    getitem_1567 = _foreach_sub_2[87]\n    getitem_1568 = _foreach_sub_2[88]\n    getitem_1569 = _foreach_sub_2[89]\n    getitem_1570 = _foreach_sub_2[90]\n    getitem_1571 = _foreach_sub_2[91]\n    getitem_1572 = _foreach_sub_2[92]\n    getitem_1573 = _foreach_sub_2[93]\n    getitem_1574 = _foreach_sub_2[94]\n    getitem_1575 = _foreach_sub_2[95]\n    getitem_1576 = _foreach_sub_2[96]\n    getitem_1577 = _foreach_sub_2[97]\n    getitem_1578 = _foreach_sub_2[98]\n    getitem_1579 = _foreach_sub_2[99]\n    getitem_1580 = _foreach_sub_2[100]\n    getitem_1581 = _foreach_sub_2[101]\n    getitem_1582 = _foreach_sub_2[102]\n    getitem_1583 = _foreach_sub_2[103]\n    getitem_1584 = _foreach_sub_2[104]\n    getitem_1585 = _foreach_sub_2[105]\n    getitem_1586 = _foreach_sub_2[106]\n    getitem_1587 = _foreach_sub_2[107]\n    getitem_1588 = _foreach_sub_2[108]\n    getitem_1589 = _foreach_sub_2[109]\n    getitem_1590 = _foreach_sub_2[110]\n    getitem_1591 = _foreach_sub_2[111]\n    getitem_1592 = _foreach_sub_2[112]\n    getitem_1593 = _foreach_sub_2[113]\n    getitem_1594 = _foreach_sub_2[114]\n    getitem_1595 = _foreach_sub_2[115]\n    getitem_1596 = _foreach_sub_2[116]\n    getitem_1597 = _foreach_sub_2[117]\n    getitem_1598 = _foreach_sub_2[118]\n    getitem_1599 = _foreach_sub_2[119]\n    getitem_1600 = _foreach_sub_2[120]\n    getitem_1601 = _foreach_sub_2[121]\n    getitem_1602 = _foreach_sub_2[122]\n    getitem_1603 = _foreach_sub_2[123]\n    getitem_1604 = _foreach_sub_2[124]\n    getitem_1605 = _foreach_sub_2[125]\n    getitem_1606 = _foreach_sub_2[126]\n    getitem_1607 = _foreach_sub_2[127]\n    getitem_1608 = _foreach_sub_2[128]\n    getitem_1609 = _foreach_sub_2[129]\n    getitem_1610 = _foreach_sub_2[130]\n    getitem_1611 = _foreach_sub_2[131]\n    getitem_1612 = _foreach_sub_2[132]\n    getitem_1613 = _foreach_sub_2[133]\n    getitem_1614 = _foreach_sub_2[134]\n    getitem_1615 = _foreach_sub_2[135]\n    getitem_1616 = _foreach_sub_2[136]\n    getitem_1617 = _foreach_sub_2[137]\n    getitem_1618 = _foreach_sub_2[138]\n    getitem_1619 = _foreach_sub_2[139]\n    getitem_1620 = _foreach_sub_2[140]\n    getitem_1621 = _foreach_sub_2[141]\n    getitem_1622 = _foreach_sub_2[142]\n    getitem_1623 = _foreach_sub_2[143]\n    getitem_1624 = _foreach_sub_2[144]\n    getitem_1625 = _foreach_sub_2[145]\n    getitem_1626 = _foreach_sub_2[146]\n    getitem_1627 = _foreach_sub_2[147];  _foreach_sub_2 = None\n    _foreach_neg = torch.ops.aten._foreach_neg.default([getitem_1480, getitem_1481, getitem_1482, getitem_1483, getitem_1484, getitem_1485, getitem_1486, getitem_1487, getitem_1488, getitem_1489, getitem_1490, getitem_1491, getitem_1492, getitem_1493, getitem_1494, getitem_1495, getitem_1496, getitem_1497, getitem_1498, getitem_1499, getitem_1500, getitem_1501, getitem_1502, getitem_1503, getitem_1504, getitem_1505, getitem_1506, getitem_1507, getitem_1508, getitem_1509, getitem_1510, getitem_1511, getitem_1512, getitem_1513, getitem_1514, getitem_1515, getitem_1516, getitem_1517, getitem_1518, getitem_1519, getitem_1520, getitem_1521, getitem_1522, getitem_1523, getitem_1524, getitem_1525, getitem_1526, getitem_1527, getitem_1528, getitem_1529, getitem_1530, getitem_1531, getitem_1532, getitem_1533, getitem_1534, getitem_1535, getitem_1536, getitem_1537, getitem_1538, getitem_1539, getitem_1540, getitem_1541, getitem_1542, getitem_1543, getitem_1544, getitem_1545, getitem_1546, getitem_1547, getitem_1548, getitem_1549, getitem_1550, getitem_1551, getitem_1552, getitem_1553, getitem_1554, getitem_1555, getitem_1556, getitem_1557, getitem_1558, getitem_1559, getitem_1560, getitem_1561, getitem_1562, getitem_1563, getitem_1564, getitem_1565, getitem_1566, getitem_1567, getitem_1568, getitem_1569, getitem_1570, getitem_1571, getitem_1572, getitem_1573, getitem_1574, getitem_1575, getitem_1576, getitem_1577, getitem_1578, getitem_1579, getitem_1580, getitem_1581, getitem_1582, getitem_1583, getitem_1584, getitem_1585, getitem_1586, getitem_1587, getitem_1588, getitem_1589, getitem_1590, getitem_1591, getitem_1592, getitem_1593, getitem_1594, getitem_1595, getitem_1596, getitem_1597, getitem_1598, getitem_1599, getitem_1600, getitem_1601, getitem_1602, getitem_1603, getitem_1604, getitem_1605, getitem_1606, getitem_1607, getitem_1608, getitem_1609, getitem_1610, getitem_1611, getitem_1612, getitem_1613, getitem_1614, getitem_1615, getitem_1616, getitem_1617, getitem_1618, getitem_1619, getitem_1620, getitem_1621, getitem_1622, getitem_1623, getitem_1624, getitem_1625, getitem_1626, getitem_1627]);  getitem_1480 = getitem_1481 = getitem_1482 = getitem_1483 = getitem_1484 = getitem_1485 = getitem_1486 = getitem_1487 = getitem_1488 = getitem_1489 = getitem_1490 = getitem_1491 = getitem_1492 = getitem_1493 = getitem_1494 = getitem_1495 = getitem_1496 = getitem_1497 = getitem_1498 = getitem_1499 = getitem_1500 = getitem_1501 = getitem_1502 = getitem_1503 = getitem_1504 = getitem_1505 = getitem_1506 = getitem_1507 = getitem_1508 = getitem_1509 = getitem_1510 = getitem_1511 = getitem_1512 = getitem_1513 = getitem_1514 = getitem_1515 = getitem_1516 = getitem_1517 = getitem_1518 = getitem_1519 = getitem_1520 = getitem_1521 = getitem_1522 = getitem_1523 = getitem_1524 = getitem_1525 = getitem_1526 = getitem_1527 = getitem_1528 = getitem_1529 = getitem_1530 = getitem_1531 = getitem_1532 = getitem_1533 = getitem_1534 = getitem_1535 = getitem_1536 = getitem_1537 = getitem_1538 = getitem_1539 = getitem_1540 = getitem_1541 = getitem_1542 = getitem_1543 = getitem_1544 = getitem_1545 = getitem_1546 = getitem_1547 = getitem_1548 = getitem_1549 = getitem_1550 = getitem_1551 = getitem_1552 = getitem_1553 = getitem_1554 = getitem_1555 = getitem_1556 = getitem_1557 = getitem_1558 = getitem_1559 = getitem_1560 = getitem_1561 = getitem_1562 = getitem_1563 = getitem_1564 = getitem_1565 = getitem_1566 = getitem_1567 = getitem_1568 = getitem_1569 = getitem_1570 = getitem_1571 = getitem_1572 = getitem_1573 = getitem_1574 = getitem_1575 = getitem_1576 = getitem_1577 = getitem_1578 = getitem_1579 = getitem_1580 = getitem_1581 = getitem_1582 = getitem_1583 = getitem_1584 = getitem_1585 = getitem_1586 = getitem_1587 = getitem_1588 = getitem_1589 = getitem_1590 = getitem_1591 = getitem_1592 = getitem_1593 = getitem_1594 = getitem_1595 = getitem_1596 = getitem_1597 = getitem_1598 = getitem_1599 = getitem_1600 = getitem_1601 = getitem_1602 = getitem_1603 = getitem_1604 = getitem_1605 = getitem_1606 = getitem_1607 = getitem_1608 = getitem_1609 = getitem_1610 = getitem_1611 = getitem_1612 = getitem_1613 = getitem_1614 = getitem_1615 = getitem_1616 = getitem_1617 = getitem_1618 = getitem_1619 = getitem_1620 = getitem_1621 = getitem_1622 = getitem_1623 = getitem_1624 = getitem_1625 = getitem_1626 = getitem_1627 = None\n    getitem_1628 = _foreach_neg[0]\n    getitem_1629 = _foreach_neg[1]\n    getitem_1630 = _foreach_neg[2]\n    getitem_1631 = _foreach_neg[3]\n    getitem_1632 = _foreach_neg[4]\n    getitem_1633 = _foreach_neg[5]\n    getitem_1634 = _foreach_neg[6]\n    getitem_1635 = _foreach_neg[7]\n    getitem_1636 = _foreach_neg[8]\n    getitem_1637 = _foreach_neg[9]\n    getitem_1638 = _foreach_neg[10]\n    getitem_1639 = _foreach_neg[11]\n    getitem_1640 = _foreach_neg[12]\n    getitem_1641 = _foreach_neg[13]\n    getitem_1642 = _foreach_neg[14]\n    getitem_1643 = _foreach_neg[15]\n    getitem_1644 = _foreach_neg[16]\n    getitem_1645 = _foreach_neg[17]\n    getitem_1646 = _foreach_neg[18]\n    getitem_1647 = _foreach_neg[19]\n    getitem_1648 = _foreach_neg[20]\n    getitem_1649 = _foreach_neg[21]\n    getitem_1650 = _foreach_neg[22]\n    getitem_1651 = _foreach_neg[23]\n    getitem_1652 = _foreach_neg[24]\n    getitem_1653 = _foreach_neg[25]\n    getitem_1654 = _foreach_neg[26]\n    getitem_1655 = _foreach_neg[27]\n    getitem_1656 = _foreach_neg[28]\n    getitem_1657 = _foreach_neg[29]\n    getitem_1658 = _foreach_neg[30]\n    getitem_1659 = _foreach_neg[31]\n    getitem_1660 = _foreach_neg[32]\n    getitem_1661 = _foreach_neg[33]\n    getitem_1662 = _foreach_neg[34]\n    getitem_1663 = _foreach_neg[35]\n    getitem_1664 = _foreach_neg[36]\n    getitem_1665 = _foreach_neg[37]\n    getitem_1666 = _foreach_neg[38]\n    getitem_1667 = _foreach_neg[39]\n    getitem_1668 = _foreach_neg[40]\n    getitem_1669 = _foreach_neg[41]\n    getitem_1670 = _foreach_neg[42]\n    getitem_1671 = _foreach_neg[43]\n    getitem_1672 = _foreach_neg[44]\n    getitem_1673 = _foreach_neg[45]\n    getitem_1674 = _foreach_neg[46]\n    getitem_1675 = _foreach_neg[47]\n    getitem_1676 = _foreach_neg[48]\n    getitem_1677 = _foreach_neg[49]\n    getitem_1678 = _foreach_neg[50]\n    getitem_1679 = _foreach_neg[51]\n    getitem_1680 = _foreach_neg[52]\n    getitem_1681 = _foreach_neg[53]\n    getitem_1682 = _foreach_neg[54]\n    getitem_1683 = _foreach_neg[55]\n    getitem_1684 = _foreach_neg[56]\n    getitem_1685 = _foreach_neg[57]\n    getitem_1686 = _foreach_neg[58]\n    getitem_1687 = _foreach_neg[59]\n    getitem_1688 = _foreach_neg[60]\n    getitem_1689 = _foreach_neg[61]\n    getitem_1690 = _foreach_neg[62]\n    getitem_1691 = _foreach_neg[63]\n    getitem_1692 = _foreach_neg[64]\n    getitem_1693 = _foreach_neg[65]\n    getitem_1694 = _foreach_neg[66]\n    getitem_1695 = _foreach_neg[67]\n    getitem_1696 = _foreach_neg[68]\n    getitem_1697 = _foreach_neg[69]\n    getitem_1698 = _foreach_neg[70]\n    getitem_1699 = _foreach_neg[71]\n    getitem_1700 = _foreach_neg[72]\n    getitem_1701 = _foreach_neg[73]\n    getitem_1702 = _foreach_neg[74]\n    getitem_1703 = _foreach_neg[75]\n    getitem_1704 = _foreach_neg[76]\n    getitem_1705 = _foreach_neg[77]\n    getitem_1706 = _foreach_neg[78]\n    getitem_1707 = _foreach_neg[79]\n    getitem_1708 = _foreach_neg[80]\n    getitem_1709 = _foreach_neg[81]\n    getitem_1710 = _foreach_neg[82]\n    getitem_1711 = _foreach_neg[83]\n    getitem_1712 = _foreach_neg[84]\n    getitem_1713 = _foreach_neg[85]\n    getitem_1714 = _foreach_neg[86]\n    getitem_1715 = _foreach_neg[87]\n    getitem_1716 = _foreach_neg[88]\n    getitem_1717 = _foreach_neg[89]\n    getitem_1718 = _foreach_neg[90]\n    getitem_1719 = _foreach_neg[91]\n    getitem_1720 = _foreach_neg[92]\n    getitem_1721 = _foreach_neg[93]\n    getitem_1722 = _foreach_neg[94]\n    getitem_1723 = _foreach_neg[95]\n    getitem_1724 = _foreach_neg[96]\n    getitem_1725 = _foreach_neg[97]\n    getitem_1726 = _foreach_neg[98]\n    getitem_1727 = _foreach_neg[99]\n    getitem_1728 = _foreach_neg[100]\n    getitem_1729 = _foreach_neg[101]\n    getitem_1730 = _foreach_neg[102]\n    getitem_1731 = _foreach_neg[103]\n    getitem_1732 = _foreach_neg[104]\n    getitem_1733 = _foreach_neg[105]\n    getitem_1734 = _foreach_neg[106]\n    getitem_1735 = _foreach_neg[107]\n    getitem_1736 = _foreach_neg[108]\n    getitem_1737 = _foreach_neg[109]\n    getitem_1738 = _foreach_neg[110]\n    getitem_1739 = _foreach_neg[111]\n    getitem_1740 = _foreach_neg[112]\n    getitem_1741 = _foreach_neg[113]\n    getitem_1742 = _foreach_neg[114]\n    getitem_1743 = _foreach_neg[115]\n    getitem_1744 = _foreach_neg[116]\n    getitem_1745 = _foreach_neg[117]\n    getitem_1746 = _foreach_neg[118]\n    getitem_1747 = _foreach_neg[119]\n    getitem_1748 = _foreach_neg[120]\n    getitem_1749 = _foreach_neg[121]\n    getitem_1750 = _foreach_neg[122]\n    getitem_1751 = _foreach_neg[123]\n    getitem_1752 = _foreach_neg[124]\n    getitem_1753 = _foreach_neg[125]\n    getitem_1754 = _foreach_neg[126]\n    getitem_1755 = _foreach_neg[127]\n    getitem_1756 = _foreach_neg[128]\n    getitem_1757 = _foreach_neg[129]\n    getitem_1758 = _foreach_neg[130]\n    getitem_1759 = _foreach_neg[131]\n    getitem_1760 = _foreach_neg[132]\n    getitem_1761 = _foreach_neg[133]\n    getitem_1762 = _foreach_neg[134]\n    getitem_1763 = _foreach_neg[135]\n    getitem_1764 = _foreach_neg[136]\n    getitem_1765 = _foreach_neg[137]\n    getitem_1766 = _foreach_neg[138]\n    getitem_1767 = _foreach_neg[139]\n    getitem_1768 = _foreach_neg[140]\n    getitem_1769 = _foreach_neg[141]\n    getitem_1770 = _foreach_neg[142]\n    getitem_1771 = _foreach_neg[143]\n    getitem_1772 = _foreach_neg[144]\n    getitem_1773 = _foreach_neg[145]\n    getitem_1774 = _foreach_neg[146]\n    getitem_1775 = _foreach_neg[147];  _foreach_neg = None\n    _foreach_div = torch.ops.aten._foreach_div.Scalar([getitem_1332, getitem_1333, getitem_1334, getitem_1335, getitem_1336, getitem_1337, getitem_1338, getitem_1339, getitem_1340, getitem_1341, getitem_1342, getitem_1343, getitem_1344, getitem_1345, getitem_1346, getitem_1347, getitem_1348, getitem_1349, getitem_1350, getitem_1351, getitem_1352, getitem_1353, getitem_1354, getitem_1355, getitem_1356, getitem_1357, getitem_1358, getitem_1359, getitem_1360, getitem_1361, getitem_1362, getitem_1363, getitem_1364, getitem_1365, getitem_1366, getitem_1367, getitem_1368, getitem_1369, getitem_1370, getitem_1371, getitem_1372, getitem_1373, getitem_1374, getitem_1375, getitem_1376, getitem_1377, getitem_1378, getitem_1379, getitem_1380, getitem_1381, getitem_1382, getitem_1383, getitem_1384, getitem_1385, getitem_1386, getitem_1387, getitem_1388, getitem_1389, getitem_1390, getitem_1391, getitem_1392, getitem_1393, getitem_1394, getitem_1395, getitem_1396, getitem_1397, getitem_1398, getitem_1399, getitem_1400, getitem_1401, getitem_1402, getitem_1403, getitem_1404, getitem_1405, getitem_1406, getitem_1407, getitem_1408, getitem_1409, getitem_1410, getitem_1411, getitem_1412, getitem_1413, getitem_1414, getitem_1415, getitem_1416, getitem_1417, getitem_1418, getitem_1419, getitem_1420, getitem_1421, getitem_1422, getitem_1423, getitem_1424, getitem_1425, getitem_1426, getitem_1427, getitem_1428, getitem_1429, getitem_1430, getitem_1431, getitem_1432, getitem_1433, getitem_1434, getitem_1435, getitem_1436, getitem_1437, getitem_1438, getitem_1439, getitem_1440, getitem_1441, getitem_1442, getitem_1443, getitem_1444, getitem_1445, getitem_1446, getitem_1447, getitem_1448, getitem_1449, getitem_1450, getitem_1451, getitem_1452, getitem_1453, getitem_1454, getitem_1455, getitem_1456, getitem_1457, getitem_1458, getitem_1459, getitem_1460, getitem_1461, getitem_1462, getitem_1463, getitem_1464, getitem_1465, getitem_1466, getitem_1467, getitem_1468, getitem_1469, getitem_1470, getitem_1471, getitem_1472, getitem_1473, getitem_1474, getitem_1475, getitem_1476, getitem_1477, getitem_1478, getitem_1479], 0.01);  getitem_1332 = getitem_1333 = getitem_1334 = getitem_1335 = getitem_1336 = getitem_1337 = getitem_1338 = getitem_1339 = getitem_1340 = getitem_1341 = getitem_1342 = getitem_1343 = getitem_1344 = getitem_1345 = getitem_1346 = getitem_1347 = getitem_1348 = getitem_1349 = getitem_1350 = getitem_1351 = getitem_1352 = getitem_1353 = getitem_1354 = getitem_1355 = getitem_1356 = getitem_1357 = getitem_1358 = getitem_1359 = getitem_1360 = getitem_1361 = getitem_1362 = getitem_1363 = getitem_1364 = getitem_1365 = getitem_1366 = getitem_1367 = getitem_1368 = getitem_1369 = getitem_1370 = getitem_1371 = getitem_1372 = getitem_1373 = getitem_1374 = getitem_1375 = getitem_1376 = getitem_1377 = getitem_1378 = getitem_1379 = getitem_1380 = getitem_1381 = getitem_1382 = getitem_1383 = getitem_1384 = getitem_1385 = getitem_1386 = getitem_1387 = getitem_1388 = getitem_1389 = getitem_1390 = getitem_1391 = getitem_1392 = getitem_1393 = getitem_1394 = getitem_1395 = getitem_1396 = getitem_1397 = getitem_1398 = getitem_1399 = getitem_1400 = getitem_1401 = getitem_1402 = getitem_1403 = getitem_1404 = getitem_1405 = getitem_1406 = getitem_1407 = getitem_1408 = getitem_1409 = getitem_1410 = getitem_1411 = getitem_1412 = getitem_1413 = getitem_1414 = getitem_1415 = getitem_1416 = getitem_1417 = getitem_1418 = getitem_1419 = getitem_1420 = getitem_1421 = getitem_1422 = getitem_1423 = getitem_1424 = getitem_1425 = getitem_1426 = getitem_1427 = getitem_1428 = getitem_1429 = getitem_1430 = getitem_1431 = getitem_1432 = getitem_1433 = getitem_1434 = getitem_1435 = getitem_1436 = getitem_1437 = getitem_1438 = getitem_1439 = getitem_1440 = getitem_1441 = getitem_1442 = getitem_1443 = getitem_1444 = getitem_1445 = getitem_1446 = getitem_1447 = getitem_1448 = getitem_1449 = getitem_1450 = getitem_1451 = getitem_1452 = getitem_1453 = getitem_1454 = getitem_1455 = getitem_1456 = getitem_1457 = getitem_1458 = getitem_1459 = getitem_1460 = getitem_1461 = getitem_1462 = getitem_1463 = getitem_1464 = getitem_1465 = getitem_1466 = getitem_1467 = getitem_1468 = getitem_1469 = getitem_1470 = getitem_1471 = getitem_1472 = getitem_1473 = getitem_1474 = getitem_1475 = getitem_1476 = getitem_1477 = getitem_1478 = getitem_1479 = None\n    getitem_1776 = _foreach_div[0]\n    getitem_1777 = _foreach_div[1]\n    getitem_1778 = _foreach_div[2]\n    getitem_1779 = _foreach_div[3]\n    getitem_1780 = _foreach_div[4]\n    getitem_1781 = _foreach_div[5]\n    getitem_1782 = _foreach_div[6]\n    getitem_1783 = _foreach_div[7]\n    getitem_1784 = _foreach_div[8]\n    getitem_1785 = _foreach_div[9]\n    getitem_1786 = _foreach_div[10]\n    getitem_1787 = _foreach_div[11]\n    getitem_1788 = _foreach_div[12]\n    getitem_1789 = _foreach_div[13]\n    getitem_1790 = _foreach_div[14]\n    getitem_1791 = _foreach_div[15]\n    getitem_1792 = _foreach_div[16]\n    getitem_1793 = _foreach_div[17]\n    getitem_1794 = _foreach_div[18]\n    getitem_1795 = _foreach_div[19]\n    getitem_1796 = _foreach_div[20]\n    getitem_1797 = _foreach_div[21]\n    getitem_1798 = _foreach_div[22]\n    getitem_1799 = _foreach_div[23]\n    getitem_1800 = _foreach_div[24]\n    getitem_1801 = _foreach_div[25]\n    getitem_1802 = _foreach_div[26]\n    getitem_1803 = _foreach_div[27]\n    getitem_1804 = _foreach_div[28]\n    getitem_1805 = _foreach_div[29]\n    getitem_1806 = _foreach_div[30]\n    getitem_1807 = _foreach_div[31]\n    getitem_1808 = _foreach_div[32]\n    getitem_1809 = _foreach_div[33]\n    getitem_1810 = _foreach_div[34]\n    getitem_1811 = _foreach_div[35]\n    getitem_1812 = _foreach_div[36]\n    getitem_1813 = _foreach_div[37]\n    getitem_1814 = _foreach_div[38]\n    getitem_1815 = _foreach_div[39]\n    getitem_1816 = _foreach_div[40]\n    getitem_1817 = _foreach_div[41]\n    getitem_1818 = _foreach_div[42]\n    getitem_1819 = _foreach_div[43]\n    getitem_1820 = _foreach_div[44]\n    getitem_1821 = _foreach_div[45]\n    getitem_1822 = _foreach_div[46]\n    getitem_1823 = _foreach_div[47]\n    getitem_1824 = _foreach_div[48]\n    getitem_1825 = _foreach_div[49]\n    getitem_1826 = _foreach_div[50]\n    getitem_1827 = _foreach_div[51]\n    getitem_1828 = _foreach_div[52]\n    getitem_1829 = _foreach_div[53]\n    getitem_1830 = _foreach_div[54]\n    getitem_1831 = _foreach_div[55]\n    getitem_1832 = _foreach_div[56]\n    getitem_1833 = _foreach_div[57]\n    getitem_1834 = _foreach_div[58]\n    getitem_1835 = _foreach_div[59]\n    getitem_1836 = _foreach_div[60]\n    getitem_1837 = _foreach_div[61]\n    getitem_1838 = _foreach_div[62]\n    getitem_1839 = _foreach_div[63]\n    getitem_1840 = _foreach_div[64]\n    getitem_1841 = _foreach_div[65]\n    getitem_1842 = _foreach_div[66]\n    getitem_1843 = _foreach_div[67]\n    getitem_1844 = _foreach_div[68]\n    getitem_1845 = _foreach_div[69]\n    getitem_1846 = _foreach_div[70]\n    getitem_1847 = _foreach_div[71]\n    getitem_1848 = _foreach_div[72]\n    getitem_1849 = _foreach_div[73]\n    getitem_1850 = _foreach_div[74]\n    getitem_1851 = _foreach_div[75]\n    getitem_1852 = _foreach_div[76]\n    getitem_1853 = _foreach_div[77]\n    getitem_1854 = _foreach_div[78]\n    getitem_1855 = _foreach_div[79]\n    getitem_1856 = _foreach_div[80]\n    getitem_1857 = _foreach_div[81]\n    getitem_1858 = _foreach_div[82]\n    getitem_1859 = _foreach_div[83]\n    getitem_1860 = _foreach_div[84]\n    getitem_1861 = _foreach_div[85]\n    getitem_1862 = _foreach_div[86]\n    getitem_1863 = _foreach_div[87]\n    getitem_1864 = _foreach_div[88]\n    getitem_1865 = _foreach_div[89]\n    getitem_1866 = _foreach_div[90]\n    getitem_1867 = _foreach_div[91]\n    getitem_1868 = _foreach_div[92]\n    getitem_1869 = _foreach_div[93]\n    getitem_1870 = _foreach_div[94]\n    getitem_1871 = _foreach_div[95]\n    getitem_1872 = _foreach_div[96]\n    getitem_1873 = _foreach_div[97]\n    getitem_1874 = _foreach_div[98]\n    getitem_1875 = _foreach_div[99]\n    getitem_1876 = _foreach_div[100]\n    getitem_1877 = _foreach_div[101]\n    getitem_1878 = _foreach_div[102]\n    getitem_1879 = _foreach_div[103]\n    getitem_1880 = _foreach_div[104]\n    getitem_1881 = _foreach_div[105]\n    getitem_1882 = _foreach_div[106]\n    getitem_1883 = _foreach_div[107]\n    getitem_1884 = _foreach_div[108]\n    getitem_1885 = _foreach_div[109]\n    getitem_1886 = _foreach_div[110]\n    getitem_1887 = _foreach_div[111]\n    getitem_1888 = _foreach_div[112]\n    getitem_1889 = _foreach_div[113]\n    getitem_1890 = _foreach_div[114]\n    getitem_1891 = _foreach_div[115]\n    getitem_1892 = _foreach_div[116]\n    getitem_1893 = _foreach_div[117]\n    getitem_1894 = _foreach_div[118]\n    getitem_1895 = _foreach_div[119]\n    getitem_1896 = _foreach_div[120]\n    getitem_1897 = _foreach_div[121]\n    getitem_1898 = _foreach_div[122]\n    getitem_1899 = _foreach_div[123]\n    getitem_1900 = _foreach_div[124]\n    getitem_1901 = _foreach_div[125]\n    getitem_1902 = _foreach_div[126]\n    getitem_1903 = _foreach_div[127]\n    getitem_1904 = _foreach_div[128]\n    getitem_1905 = _foreach_div[129]\n    getitem_1906 = _foreach_div[130]\n    getitem_1907 = _foreach_div[131]\n    getitem_1908 = _foreach_div[132]\n    getitem_1909 = _foreach_div[133]\n    getitem_1910 = _foreach_div[134]\n    getitem_1911 = _foreach_div[135]\n    getitem_1912 = _foreach_div[136]\n    getitem_1913 = _foreach_div[137]\n    getitem_1914 = _foreach_div[138]\n    getitem_1915 = _foreach_div[139]\n    getitem_1916 = _foreach_div[140]\n    getitem_1917 = _foreach_div[141]\n    getitem_1918 = _foreach_div[142]\n    getitem_1919 = _foreach_div[143]\n    getitem_1920 = _foreach_div[144]\n    getitem_1921 = _foreach_div[145]\n    getitem_1922 = _foreach_div[146]\n    getitem_1923 = _foreach_div[147];  _foreach_div = None\n    _foreach_reciprocal = torch.ops.aten._foreach_reciprocal.default([getitem_1776, getitem_1777, getitem_1778, getitem_1779, getitem_1780, getitem_1781, getitem_1782, getitem_1783, getitem_1784, getitem_1785, getitem_1786, getitem_1787, getitem_1788, getitem_1789, getitem_1790, getitem_1791, getitem_1792, getitem_1793, getitem_1794, getitem_1795, getitem_1796, getitem_1797, getitem_1798, getitem_1799, getitem_1800, getitem_1801, getitem_1802, getitem_1803, getitem_1804, getitem_1805, getitem_1806, getitem_1807, getitem_1808, getitem_1809, getitem_1810, getitem_1811, getitem_1812, getitem_1813, getitem_1814, getitem_1815, getitem_1816, getitem_1817, getitem_1818, getitem_1819, getitem_1820, getitem_1821, getitem_1822, getitem_1823, getitem_1824, getitem_1825, getitem_1826, getitem_1827, getitem_1828, getitem_1829, getitem_1830, getitem_1831, getitem_1832, getitem_1833, getitem_1834, getitem_1835, getitem_1836, getitem_1837, getitem_1838, getitem_1839, getitem_1840, getitem_1841, getitem_1842, getitem_1843, getitem_1844, getitem_1845, getitem_1846, getitem_1847, getitem_1848, getitem_1849, getitem_1850, getitem_1851, getitem_1852, getitem_1853, getitem_1854, getitem_1855, getitem_1856, getitem_1857, getitem_1858, getitem_1859, getitem_1860, getitem_1861, getitem_1862, getitem_1863, getitem_1864, getitem_1865, getitem_1866, getitem_1867, getitem_1868, getitem_1869, getitem_1870, getitem_1871, getitem_1872, getitem_1873, getitem_1874, getitem_1875, getitem_1876, getitem_1877, getitem_1878, getitem_1879, getitem_1880, getitem_1881, getitem_1882, getitem_1883, getitem_1884, getitem_1885, getitem_1886, getitem_1887, getitem_1888, getitem_1889, getitem_1890, getitem_1891, getitem_1892, getitem_1893, getitem_1894, getitem_1895, getitem_1896, getitem_1897, getitem_1898, getitem_1899, getitem_1900, getitem_1901, getitem_1902, getitem_1903, getitem_1904, getitem_1905, getitem_1906, getitem_1907, getitem_1908, getitem_1909, getitem_1910, getitem_1911, getitem_1912, getitem_1913, getitem_1914, getitem_1915, getitem_1916, getitem_1917, getitem_1918, getitem_1919, getitem_1920, getitem_1921, getitem_1922, getitem_1923]);  getitem_1776 = getitem_1777 = getitem_1778 = getitem_1779 = getitem_1780 = getitem_1781 = getitem_1782 = getitem_1783 = getitem_1784 = getitem_1785 = getitem_1786 = getitem_1787 = getitem_1788 = getitem_1789 = getitem_1790 = getitem_1791 = getitem_1792 = getitem_1793 = getitem_1794 = getitem_1795 = getitem_1796 = getitem_1797 = getitem_1798 = getitem_1799 = getitem_1800 = getitem_1801 = getitem_1802 = getitem_1803 = getitem_1804 = getitem_1805 = getitem_1806 = getitem_1807 = getitem_1808 = getitem_1809 = getitem_1810 = getitem_1811 = getitem_1812 = getitem_1813 = getitem_1814 = getitem_1815 = getitem_1816 = getitem_1817 = getitem_1818 = getitem_1819 = getitem_1820 = getitem_1821 = getitem_1822 = getitem_1823 = getitem_1824 = getitem_1825 = getitem_1826 = getitem_1827 = getitem_1828 = getitem_1829 = getitem_1830 = getitem_1831 = getitem_1832 = getitem_1833 = getitem_1834 = getitem_1835 = getitem_1836 = getitem_1837 = getitem_1838 = getitem_1839 = getitem_1840 = getitem_1841 = getitem_1842 = getitem_1843 = getitem_1844 = getitem_1845 = getitem_1846 = getitem_1847 = getitem_1848 = getitem_1849 = getitem_1850 = getitem_1851 = getitem_1852 = getitem_1853 = getitem_1854 = getitem_1855 = getitem_1856 = getitem_1857 = getitem_1858 = getitem_1859 = getitem_1860 = getitem_1861 = getitem_1862 = getitem_1863 = getitem_1864 = getitem_1865 = getitem_1866 = getitem_1867 = getitem_1868 = getitem_1869 = getitem_1870 = getitem_1871 = getitem_1872 = getitem_1873 = getitem_1874 = getitem_1875 = getitem_1876 = getitem_1877 = getitem_1878 = getitem_1879 = getitem_1880 = getitem_1881 = getitem_1882 = getitem_1883 = getitem_1884 = getitem_1885 = getitem_1886 = getitem_1887 = getitem_1888 = getitem_1889 = getitem_1890 = getitem_1891 = getitem_1892 = getitem_1893 = getitem_1894 = getitem_1895 = getitem_1896 = getitem_1897 = getitem_1898 = getitem_1899 = getitem_1900 = getitem_1901 = getitem_1902 = getitem_1903 = getitem_1904 = getitem_1905 = getitem_1906 = getitem_1907 = getitem_1908 = getitem_1909 = getitem_1910 = getitem_1911 = getitem_1912 = getitem_1913 = getitem_1914 = getitem_1915 = getitem_1916 = getitem_1917 = getitem_1918 = getitem_1919 = getitem_1920 = getitem_1921 = getitem_1922 = getitem_1923 = None\n    getitem_1924 = _foreach_reciprocal[0]\n    getitem_1925 = _foreach_reciprocal[1]\n    getitem_1926 = _foreach_reciprocal[2]\n    getitem_1927 = _foreach_reciprocal[3]\n    getitem_1928 = _foreach_reciprocal[4]\n    getitem_1929 = _foreach_reciprocal[5]\n    getitem_1930 = _foreach_reciprocal[6]\n    getitem_1931 = _foreach_reciprocal[7]\n    getitem_1932 = _foreach_reciprocal[8]\n    getitem_1933 = _foreach_reciprocal[9]\n    getitem_1934 = _foreach_reciprocal[10]\n    getitem_1935 = _foreach_reciprocal[11]\n    getitem_1936 = _foreach_reciprocal[12]\n    getitem_1937 = _foreach_reciprocal[13]\n    getitem_1938 = _foreach_reciprocal[14]\n    getitem_1939 = _foreach_reciprocal[15]\n    getitem_1940 = _foreach_reciprocal[16]\n    getitem_1941 = _foreach_reciprocal[17]\n    getitem_1942 = _foreach_reciprocal[18]\n    getitem_1943 = _foreach_reciprocal[19]\n    getitem_1944 = _foreach_reciprocal[20]\n    getitem_1945 = _foreach_reciprocal[21]\n    getitem_1946 = _foreach_reciprocal[22]\n    getitem_1947 = _foreach_reciprocal[23]\n    getitem_1948 = _foreach_reciprocal[24]\n    getitem_1949 = _foreach_reciprocal[25]\n    getitem_1950 = _foreach_reciprocal[26]\n    getitem_1951 = _foreach_reciprocal[27]\n    getitem_1952 = _foreach_reciprocal[28]\n    getitem_1953 = _foreach_reciprocal[29]\n    getitem_1954 = _foreach_reciprocal[30]\n    getitem_1955 = _foreach_reciprocal[31]\n    getitem_1956 = _foreach_reciprocal[32]\n    getitem_1957 = _foreach_reciprocal[33]\n    getitem_1958 = _foreach_reciprocal[34]\n    getitem_1959 = _foreach_reciprocal[35]\n    getitem_1960 = _foreach_reciprocal[36]\n    getitem_1961 = _foreach_reciprocal[37]\n    getitem_1962 = _foreach_reciprocal[38]\n    getitem_1963 = _foreach_reciprocal[39]\n    getitem_1964 = _foreach_reciprocal[40]\n    getitem_1965 = _foreach_reciprocal[41]\n    getitem_1966 = _foreach_reciprocal[42]\n    getitem_1967 = _foreach_reciprocal[43]\n    getitem_1968 = _foreach_reciprocal[44]\n    getitem_1969 = _foreach_reciprocal[45]\n    getitem_1970 = _foreach_reciprocal[46]\n    getitem_1971 = _foreach_reciprocal[47]\n    getitem_1972 = _foreach_reciprocal[48]\n    getitem_1973 = _foreach_reciprocal[49]\n    getitem_1974 = _foreach_reciprocal[50]\n    getitem_1975 = _foreach_reciprocal[51]\n    getitem_1976 = _foreach_reciprocal[52]\n    getitem_1977 = _foreach_reciprocal[53]\n    getitem_1978 = _foreach_reciprocal[54]\n    getitem_1979 = _foreach_reciprocal[55]\n    getitem_1980 = _foreach_reciprocal[56]\n    getitem_1981 = _foreach_reciprocal[57]\n    getitem_1982 = _foreach_reciprocal[58]\n    getitem_1983 = _foreach_reciprocal[59]\n    getitem_1984 = _foreach_reciprocal[60]\n    getitem_1985 = _foreach_reciprocal[61]\n    getitem_1986 = _foreach_reciprocal[62]\n    getitem_1987 = _foreach_reciprocal[63]\n    getitem_1988 = _foreach_reciprocal[64]\n    getitem_1989 = _foreach_reciprocal[65]\n    getitem_1990 = _foreach_reciprocal[66]\n    getitem_1991 = _foreach_reciprocal[67]\n    getitem_1992 = _foreach_reciprocal[68]\n    getitem_1993 = _foreach_reciprocal[69]\n    getitem_1994 = _foreach_reciprocal[70]\n    getitem_1995 = _foreach_reciprocal[71]\n    getitem_1996 = _foreach_reciprocal[72]\n    getitem_1997 = _foreach_reciprocal[73]\n    getitem_1998 = _foreach_reciprocal[74]\n    getitem_1999 = _foreach_reciprocal[75]\n    getitem_2000 = _foreach_reciprocal[76]\n    getitem_2001 = _foreach_reciprocal[77]\n    getitem_2002 = _foreach_reciprocal[78]\n    getitem_2003 = _foreach_reciprocal[79]\n    getitem_2004 = _foreach_reciprocal[80]\n    getitem_2005 = _foreach_reciprocal[81]\n    getitem_2006 = _foreach_reciprocal[82]\n    getitem_2007 = _foreach_reciprocal[83]\n    getitem_2008 = _foreach_reciprocal[84]\n    getitem_2009 = _foreach_reciprocal[85]\n    getitem_2010 = _foreach_reciprocal[86]\n    getitem_2011 = _foreach_reciprocal[87]\n    getitem_2012 = _foreach_reciprocal[88]\n    getitem_2013 = _foreach_reciprocal[89]\n    getitem_2014 = _foreach_reciprocal[90]\n    getitem_2015 = _foreach_reciprocal[91]\n    getitem_2016 = _foreach_reciprocal[92]\n    getitem_2017 = _foreach_reciprocal[93]\n    getitem_2018 = _foreach_reciprocal[94]\n    getitem_2019 = _foreach_reciprocal[95]\n    getitem_2020 = _foreach_reciprocal[96]\n    getitem_2021 = _foreach_reciprocal[97]\n    getitem_2022 = _foreach_reciprocal[98]\n    getitem_2023 = _foreach_reciprocal[99]\n    getitem_2024 = _foreach_reciprocal[100]\n    getitem_2025 = _foreach_reciprocal[101]\n    getitem_2026 = _foreach_reciprocal[102]\n    getitem_2027 = _foreach_reciprocal[103]\n    getitem_2028 = _foreach_reciprocal[104]\n    getitem_2029 = _foreach_reciprocal[105]\n    getitem_2030 = _foreach_reciprocal[106]\n    getitem_2031 = _foreach_reciprocal[107]\n    getitem_2032 = _foreach_reciprocal[108]\n    getitem_2033 = _foreach_reciprocal[109]\n    getitem_2034 = _foreach_reciprocal[110]\n    getitem_2035 = _foreach_reciprocal[111]\n    getitem_2036 = _foreach_reciprocal[112]\n    getitem_2037 = _foreach_reciprocal[113]\n    getitem_2038 = _foreach_reciprocal[114]\n    getitem_2039 = _foreach_reciprocal[115]\n    getitem_2040 = _foreach_reciprocal[116]\n    getitem_2041 = _foreach_reciprocal[117]\n    getitem_2042 = _foreach_reciprocal[118]\n    getitem_2043 = _foreach_reciprocal[119]\n    getitem_2044 = _foreach_reciprocal[120]\n    getitem_2045 = _foreach_reciprocal[121]\n    getitem_2046 = _foreach_reciprocal[122]\n    getitem_2047 = _foreach_reciprocal[123]\n    getitem_2048 = _foreach_reciprocal[124]\n    getitem_2049 = _foreach_reciprocal[125]\n    getitem_2050 = _foreach_reciprocal[126]\n    getitem_2051 = _foreach_reciprocal[127]\n    getitem_2052 = _foreach_reciprocal[128]\n    getitem_2053 = _foreach_reciprocal[129]\n    getitem_2054 = _foreach_reciprocal[130]\n    getitem_2055 = _foreach_reciprocal[131]\n    getitem_2056 = _foreach_reciprocal[132]\n    getitem_2057 = _foreach_reciprocal[133]\n    getitem_2058 = _foreach_reciprocal[134]\n    getitem_2059 = _foreach_reciprocal[135]\n    getitem_2060 = _foreach_reciprocal[136]\n    getitem_2061 = _foreach_reciprocal[137]\n    getitem_2062 = _foreach_reciprocal[138]\n    getitem_2063 = _foreach_reciprocal[139]\n    getitem_2064 = _foreach_reciprocal[140]\n    getitem_2065 = _foreach_reciprocal[141]\n    getitem_2066 = _foreach_reciprocal[142]\n    getitem_2067 = _foreach_reciprocal[143]\n    getitem_2068 = _foreach_reciprocal[144]\n    getitem_2069 = _foreach_reciprocal[145]\n    getitem_2070 = _foreach_reciprocal[146]\n    getitem_2071 = _foreach_reciprocal[147];  _foreach_reciprocal = None\n    _foreach_sqrt = torch.ops.aten._foreach_sqrt.default([getitem_1628, getitem_1629, getitem_1630, getitem_1631, getitem_1632, getitem_1633, getitem_1634, getitem_1635, getitem_1636, getitem_1637, getitem_1638, getitem_1639, getitem_1640, getitem_1641, getitem_1642, getitem_1643, getitem_1644, getitem_1645, getitem_1646, getitem_1647, getitem_1648, getitem_1649, getitem_1650, getitem_1651, getitem_1652, getitem_1653, getitem_1654, getitem_1655, getitem_1656, getitem_1657, getitem_1658, getitem_1659, getitem_1660, getitem_1661, getitem_1662, getitem_1663, getitem_1664, getitem_1665, getitem_1666, getitem_1667, getitem_1668, getitem_1669, getitem_1670, getitem_1671, getitem_1672, getitem_1673, getitem_1674, getitem_1675, getitem_1676, getitem_1677, getitem_1678, getitem_1679, getitem_1680, getitem_1681, getitem_1682, getitem_1683, getitem_1684, getitem_1685, getitem_1686, getitem_1687, getitem_1688, getitem_1689, getitem_1690, getitem_1691, getitem_1692, getitem_1693, getitem_1694, getitem_1695, getitem_1696, getitem_1697, getitem_1698, getitem_1699, getitem_1700, getitem_1701, getitem_1702, getitem_1703, getitem_1704, getitem_1705, getitem_1706, getitem_1707, getitem_1708, getitem_1709, getitem_1710, getitem_1711, getitem_1712, getitem_1713, getitem_1714, getitem_1715, getitem_1716, getitem_1717, getitem_1718, getitem_1719, getitem_1720, getitem_1721, getitem_1722, getitem_1723, getitem_1724, getitem_1725, getitem_1726, getitem_1727, getitem_1728, getitem_1729, getitem_1730, getitem_1731, getitem_1732, getitem_1733, getitem_1734, getitem_1735, getitem_1736, getitem_1737, getitem_1738, getitem_1739, getitem_1740, getitem_1741, getitem_1742, getitem_1743, getitem_1744, getitem_1745, getitem_1746, getitem_1747, getitem_1748, getitem_1749, getitem_1750, getitem_1751, getitem_1752, getitem_1753, getitem_1754, getitem_1755, getitem_1756, getitem_1757, getitem_1758, getitem_1759, getitem_1760, getitem_1761, getitem_1762, getitem_1763, getitem_1764, getitem_1765, getitem_1766, getitem_1767, getitem_1768, getitem_1769, getitem_1770, getitem_1771, getitem_1772, getitem_1773, getitem_1774, getitem_1775]);  getitem_1628 = getitem_1629 = getitem_1630 = getitem_1631 = getitem_1632 = getitem_1633 = getitem_1634 = getitem_1635 = getitem_1636 = getitem_1637 = getitem_1638 = getitem_1639 = getitem_1640 = getitem_1641 = getitem_1642 = getitem_1643 = getitem_1644 = getitem_1645 = getitem_1646 = getitem_1647 = getitem_1648 = getitem_1649 = getitem_1650 = getitem_1651 = getitem_1652 = getitem_1653 = getitem_1654 = getitem_1655 = getitem_1656 = getitem_1657 = getitem_1658 = getitem_1659 = getitem_1660 = getitem_1661 = getitem_1662 = getitem_1663 = getitem_1664 = getitem_1665 = getitem_1666 = getitem_1667 = getitem_1668 = getitem_1669 = getitem_1670 = getitem_1671 = getitem_1672 = getitem_1673 = getitem_1674 = getitem_1675 = getitem_1676 = getitem_1677 = getitem_1678 = getitem_1679 = getitem_1680 = getitem_1681 = getitem_1682 = getitem_1683 = getitem_1684 = getitem_1685 = getitem_1686 = getitem_1687 = getitem_1688 = getitem_1689 = getitem_1690 = getitem_1691 = getitem_1692 = getitem_1693 = getitem_1694 = getitem_1695 = getitem_1696 = getitem_1697 = getitem_1698 = getitem_1699 = getitem_1700 = getitem_1701 = getitem_1702 = getitem_1703 = getitem_1704 = getitem_1705 = getitem_1706 = getitem_1707 = getitem_1708 = getitem_1709 = getitem_1710 = getitem_1711 = getitem_1712 = getitem_1713 = getitem_1714 = getitem_1715 = getitem_1716 = getitem_1717 = getitem_1718 = getitem_1719 = getitem_1720 = getitem_1721 = getitem_1722 = getitem_1723 = getitem_1724 = getitem_1725 = getitem_1726 = getitem_1727 = getitem_1728 = getitem_1729 = getitem_1730 = getitem_1731 = getitem_1732 = getitem_1733 = getitem_1734 = getitem_1735 = getitem_1736 = getitem_1737 = getitem_1738 = getitem_1739 = getitem_1740 = getitem_1741 = getitem_1742 = getitem_1743 = getitem_1744 = getitem_1745 = getitem_1746 = getitem_1747 = getitem_1748 = getitem_1749 = getitem_1750 = getitem_1751 = getitem_1752 = getitem_1753 = getitem_1754 = getitem_1755 = getitem_1756 = getitem_1757 = getitem_1758 = getitem_1759 = getitem_1760 = getitem_1761 = getitem_1762 = getitem_1763 = getitem_1764 = getitem_1765 = getitem_1766 = getitem_1767 = getitem_1768 = getitem_1769 = getitem_1770 = getitem_1771 = getitem_1772 = getitem_1773 = getitem_1774 = getitem_1775 = None\n    getitem_2072 = _foreach_sqrt[0]\n    getitem_2073 = _foreach_sqrt[1]\n    getitem_2074 = _foreach_sqrt[2]\n    getitem_2075 = _foreach_sqrt[3]\n    getitem_2076 = _foreach_sqrt[4]\n    getitem_2077 = _foreach_sqrt[5]\n    getitem_2078 = _foreach_sqrt[6]\n    getitem_2079 = _foreach_sqrt[7]\n    getitem_2080 = _foreach_sqrt[8]\n    getitem_2081 = _foreach_sqrt[9]\n    getitem_2082 = _foreach_sqrt[10]\n    getitem_2083 = _foreach_sqrt[11]\n    getitem_2084 = _foreach_sqrt[12]\n    getitem_2085 = _foreach_sqrt[13]\n    getitem_2086 = _foreach_sqrt[14]\n    getitem_2087 = _foreach_sqrt[15]\n    getitem_2088 = _foreach_sqrt[16]\n    getitem_2089 = _foreach_sqrt[17]\n    getitem_2090 = _foreach_sqrt[18]\n    getitem_2091 = _foreach_sqrt[19]\n    getitem_2092 = _foreach_sqrt[20]\n    getitem_2093 = _foreach_sqrt[21]\n    getitem_2094 = _foreach_sqrt[22]\n    getitem_2095 = _foreach_sqrt[23]\n    getitem_2096 = _foreach_sqrt[24]\n    getitem_2097 = _foreach_sqrt[25]\n    getitem_2098 = _foreach_sqrt[26]\n    getitem_2099 = _foreach_sqrt[27]\n    getitem_2100 = _foreach_sqrt[28]\n    getitem_2101 = _foreach_sqrt[29]\n    getitem_2102 = _foreach_sqrt[30]\n    getitem_2103 = _foreach_sqrt[31]\n    getitem_2104 = _foreach_sqrt[32]\n    getitem_2105 = _foreach_sqrt[33]\n    getitem_2106 = _foreach_sqrt[34]\n    getitem_2107 = _foreach_sqrt[35]\n    getitem_2108 = _foreach_sqrt[36]\n    getitem_2109 = _foreach_sqrt[37]\n    getitem_2110 = _foreach_sqrt[38]\n    getitem_2111 = _foreach_sqrt[39]\n    getitem_2112 = _foreach_sqrt[40]\n    getitem_2113 = _foreach_sqrt[41]\n    getitem_2114 = _foreach_sqrt[42]\n    getitem_2115 = _foreach_sqrt[43]\n    getitem_2116 = _foreach_sqrt[44]\n    getitem_2117 = _foreach_sqrt[45]\n    getitem_2118 = _foreach_sqrt[46]\n    getitem_2119 = _foreach_sqrt[47]\n    getitem_2120 = _foreach_sqrt[48]\n    getitem_2121 = _foreach_sqrt[49]\n    getitem_2122 = _foreach_sqrt[50]\n    getitem_2123 = _foreach_sqrt[51]\n    getitem_2124 = _foreach_sqrt[52]\n    getitem_2125 = _foreach_sqrt[53]\n    getitem_2126 = _foreach_sqrt[54]\n    getitem_2127 = _foreach_sqrt[55]\n    getitem_2128 = _foreach_sqrt[56]\n    getitem_2129 = _foreach_sqrt[57]\n    getitem_2130 = _foreach_sqrt[58]\n    getitem_2131 = _foreach_sqrt[59]\n    getitem_2132 = _foreach_sqrt[60]\n    getitem_2133 = _foreach_sqrt[61]\n    getitem_2134 = _foreach_sqrt[62]\n    getitem_2135 = _foreach_sqrt[63]\n    getitem_2136 = _foreach_sqrt[64]\n    getitem_2137 = _foreach_sqrt[65]\n    getitem_2138 = _foreach_sqrt[66]\n    getitem_2139 = _foreach_sqrt[67]\n    getitem_2140 = _foreach_sqrt[68]\n    getitem_2141 = _foreach_sqrt[69]\n    getitem_2142 = _foreach_sqrt[70]\n    getitem_2143 = _foreach_sqrt[71]\n    getitem_2144 = _foreach_sqrt[72]\n    getitem_2145 = _foreach_sqrt[73]\n    getitem_2146 = _foreach_sqrt[74]\n    getitem_2147 = _foreach_sqrt[75]\n    getitem_2148 = _foreach_sqrt[76]\n    getitem_2149 = _foreach_sqrt[77]\n    getitem_2150 = _foreach_sqrt[78]\n    getitem_2151 = _foreach_sqrt[79]\n    getitem_2152 = _foreach_sqrt[80]\n    getitem_2153 = _foreach_sqrt[81]\n    getitem_2154 = _foreach_sqrt[82]\n    getitem_2155 = _foreach_sqrt[83]\n    getitem_2156 = _foreach_sqrt[84]\n    getitem_2157 = _foreach_sqrt[85]\n    getitem_2158 = _foreach_sqrt[86]\n    getitem_2159 = _foreach_sqrt[87]\n    getitem_2160 = _foreach_sqrt[88]\n    getitem_2161 = _foreach_sqrt[89]\n    getitem_2162 = _foreach_sqrt[90]\n    getitem_2163 = _foreach_sqrt[91]\n    getitem_2164 = _foreach_sqrt[92]\n    getitem_2165 = _foreach_sqrt[93]\n    getitem_2166 = _foreach_sqrt[94]\n    getitem_2167 = _foreach_sqrt[95]\n    getitem_2168 = _foreach_sqrt[96]\n    getitem_2169 = _foreach_sqrt[97]\n    getitem_2170 = _foreach_sqrt[98]\n    getitem_2171 = _foreach_sqrt[99]\n    getitem_2172 = _foreach_sqrt[100]\n    getitem_2173 = _foreach_sqrt[101]\n    getitem_2174 = _foreach_sqrt[102]\n    getitem_2175 = _foreach_sqrt[103]\n    getitem_2176 = _foreach_sqrt[104]\n    getitem_2177 = _foreach_sqrt[105]\n    getitem_2178 = _foreach_sqrt[106]\n    getitem_2179 = _foreach_sqrt[107]\n    getitem_2180 = _foreach_sqrt[108]\n    getitem_2181 = _foreach_sqrt[109]\n    getitem_2182 = _foreach_sqrt[110]\n    getitem_2183 = _foreach_sqrt[111]\n    getitem_2184 = _foreach_sqrt[112]\n    getitem_2185 = _foreach_sqrt[113]\n    getitem_2186 = _foreach_sqrt[114]\n    getitem_2187 = _foreach_sqrt[115]\n    getitem_2188 = _foreach_sqrt[116]\n    getitem_2189 = _foreach_sqrt[117]\n    getitem_2190 = _foreach_sqrt[118]\n    getitem_2191 = _foreach_sqrt[119]\n    getitem_2192 = _foreach_sqrt[120]\n    getitem_2193 = _foreach_sqrt[121]\n    getitem_2194 = _foreach_sqrt[122]\n    getitem_2195 = _foreach_sqrt[123]\n    getitem_2196 = _foreach_sqrt[124]\n    getitem_2197 = _foreach_sqrt[125]\n    getitem_2198 = _foreach_sqrt[126]\n    getitem_2199 = _foreach_sqrt[127]\n    getitem_2200 = _foreach_sqrt[128]\n    getitem_2201 = _foreach_sqrt[129]\n    getitem_2202 = _foreach_sqrt[130]\n    getitem_2203 = _foreach_sqrt[131]\n    getitem_2204 = _foreach_sqrt[132]\n    getitem_2205 = _foreach_sqrt[133]\n    getitem_2206 = _foreach_sqrt[134]\n    getitem_2207 = _foreach_sqrt[135]\n    getitem_2208 = _foreach_sqrt[136]\n    getitem_2209 = _foreach_sqrt[137]\n    getitem_2210 = _foreach_sqrt[138]\n    getitem_2211 = _foreach_sqrt[139]\n    getitem_2212 = _foreach_sqrt[140]\n    getitem_2213 = _foreach_sqrt[141]\n    getitem_2214 = _foreach_sqrt[142]\n    getitem_2215 = _foreach_sqrt[143]\n    getitem_2216 = _foreach_sqrt[144]\n    getitem_2217 = _foreach_sqrt[145]\n    getitem_2218 = _foreach_sqrt[146]\n    getitem_2219 = _foreach_sqrt[147];  _foreach_sqrt = None\n    _foreach_sqrt_1 = torch.ops.aten._foreach_sqrt.default([getitem_888, getitem_889, getitem_890, getitem_891, getitem_892, getitem_893, getitem_894, getitem_895, getitem_896, getitem_897, getitem_898, getitem_899, getitem_900, getitem_901, getitem_902, getitem_903, getitem_904, getitem_905, getitem_906, getitem_907, getitem_908, getitem_909, getitem_910, getitem_911, getitem_912, getitem_913, getitem_914, getitem_915, getitem_916, getitem_917, getitem_918, getitem_919, getitem_920, getitem_921, getitem_922, getitem_923, getitem_924, getitem_925, getitem_926, getitem_927, getitem_928, getitem_929, getitem_930, getitem_931, getitem_932, getitem_933, getitem_934, getitem_935, getitem_936, getitem_937, getitem_938, getitem_939, getitem_940, getitem_941, getitem_942, getitem_943, getitem_944, getitem_945, getitem_946, getitem_947, getitem_948, getitem_949, getitem_950, getitem_951, getitem_952, getitem_953, getitem_954, getitem_955, getitem_956, getitem_957, getitem_958, getitem_959, getitem_960, getitem_961, getitem_962, getitem_963, getitem_964, getitem_965, getitem_966, getitem_967, getitem_968, getitem_969, getitem_970, getitem_971, getitem_972, getitem_973, getitem_974, getitem_975, getitem_976, getitem_977, getitem_978, getitem_979, getitem_980, getitem_981, getitem_982, getitem_983, getitem_984, getitem_985, getitem_986, getitem_987, getitem_988, getitem_989, getitem_990, getitem_991, getitem_992, getitem_993, getitem_994, getitem_995, getitem_996, getitem_997, getitem_998, getitem_999, getitem_1000, getitem_1001, getitem_1002, getitem_1003, getitem_1004, getitem_1005, getitem_1006, getitem_1007, getitem_1008, getitem_1009, getitem_1010, getitem_1011, getitem_1012, getitem_1013, getitem_1014, getitem_1015, getitem_1016, getitem_1017, getitem_1018, getitem_1019, getitem_1020, getitem_1021, getitem_1022, getitem_1023, getitem_1024, getitem_1025, getitem_1026, getitem_1027, getitem_1028, getitem_1029, getitem_1030, getitem_1031, getitem_1032, getitem_1033, getitem_1034, getitem_1035])\n    getitem_2220 = _foreach_sqrt_1[0]\n    getitem_2221 = _foreach_sqrt_1[1]\n    getitem_2222 = _foreach_sqrt_1[2]\n    getitem_2223 = _foreach_sqrt_1[3]\n    getitem_2224 = _foreach_sqrt_1[4]\n    getitem_2225 = _foreach_sqrt_1[5]\n    getitem_2226 = _foreach_sqrt_1[6]\n    getitem_2227 = _foreach_sqrt_1[7]\n    getitem_2228 = _foreach_sqrt_1[8]\n    getitem_2229 = _foreach_sqrt_1[9]\n    getitem_2230 = _foreach_sqrt_1[10]\n    getitem_2231 = _foreach_sqrt_1[11]\n    getitem_2232 = _foreach_sqrt_1[12]\n    getitem_2233 = _foreach_sqrt_1[13]\n    getitem_2234 = _foreach_sqrt_1[14]\n    getitem_2235 = _foreach_sqrt_1[15]\n    getitem_2236 = _foreach_sqrt_1[16]\n    getitem_2237 = _foreach_sqrt_1[17]\n    getitem_2238 = _foreach_sqrt_1[18]\n    getitem_2239 = _foreach_sqrt_1[19]\n    getitem_2240 = _foreach_sqrt_1[20]\n    getitem_2241 = _foreach_sqrt_1[21]\n    getitem_2242 = _foreach_sqrt_1[22]\n    getitem_2243 = _foreach_sqrt_1[23]\n    getitem_2244 = _foreach_sqrt_1[24]\n    getitem_2245 = _foreach_sqrt_1[25]\n    getitem_2246 = _foreach_sqrt_1[26]\n    getitem_2247 = _foreach_sqrt_1[27]\n    getitem_2248 = _foreach_sqrt_1[28]\n    getitem_2249 = _foreach_sqrt_1[29]\n    getitem_2250 = _foreach_sqrt_1[30]\n    getitem_2251 = _foreach_sqrt_1[31]\n    getitem_2252 = _foreach_sqrt_1[32]\n    getitem_2253 = _foreach_sqrt_1[33]\n    getitem_2254 = _foreach_sqrt_1[34]\n    getitem_2255 = _foreach_sqrt_1[35]\n    getitem_2256 = _foreach_sqrt_1[36]\n    getitem_2257 = _foreach_sqrt_1[37]\n    getitem_2258 = _foreach_sqrt_1[38]\n    getitem_2259 = _foreach_sqrt_1[39]\n    getitem_2260 = _foreach_sqrt_1[40]\n    getitem_2261 = _foreach_sqrt_1[41]\n    getitem_2262 = _foreach_sqrt_1[42]\n    getitem_2263 = _foreach_sqrt_1[43]\n    getitem_2264 = _foreach_sqrt_1[44]\n    getitem_2265 = _foreach_sqrt_1[45]\n    getitem_2266 = _foreach_sqrt_1[46]\n    getitem_2267 = _foreach_sqrt_1[47]\n    getitem_2268 = _foreach_sqrt_1[48]\n    getitem_2269 = _foreach_sqrt_1[49]\n    getitem_2270 = _foreach_sqrt_1[50]\n    getitem_2271 = _foreach_sqrt_1[51]\n    getitem_2272 = _foreach_sqrt_1[52]\n    getitem_2273 = _foreach_sqrt_1[53]\n    getitem_2274 = _foreach_sqrt_1[54]\n    getitem_2275 = _foreach_sqrt_1[55]\n    getitem_2276 = _foreach_sqrt_1[56]\n    getitem_2277 = _foreach_sqrt_1[57]\n    getitem_2278 = _foreach_sqrt_1[58]\n    getitem_2279 = _foreach_sqrt_1[59]\n    getitem_2280 = _foreach_sqrt_1[60]\n    getitem_2281 = _foreach_sqrt_1[61]\n    getitem_2282 = _foreach_sqrt_1[62]\n    getitem_2283 = _foreach_sqrt_1[63]\n    getitem_2284 = _foreach_sqrt_1[64]\n    getitem_2285 = _foreach_sqrt_1[65]\n    getitem_2286 = _foreach_sqrt_1[66]\n    getitem_2287 = _foreach_sqrt_1[67]\n    getitem_2288 = _foreach_sqrt_1[68]\n    getitem_2289 = _foreach_sqrt_1[69]\n    getitem_2290 = _foreach_sqrt_1[70]\n    getitem_2291 = _foreach_sqrt_1[71]\n    getitem_2292 = _foreach_sqrt_1[72]\n    getitem_2293 = _foreach_sqrt_1[73]\n    getitem_2294 = _foreach_sqrt_1[74]\n    getitem_2295 = _foreach_sqrt_1[75]\n    getitem_2296 = _foreach_sqrt_1[76]\n    getitem_2297 = _foreach_sqrt_1[77]\n    getitem_2298 = _foreach_sqrt_1[78]\n    getitem_2299 = _foreach_sqrt_1[79]\n    getitem_2300 = _foreach_sqrt_1[80]\n    getitem_2301 = _foreach_sqrt_1[81]\n    getitem_2302 = _foreach_sqrt_1[82]\n    getitem_2303 = _foreach_sqrt_1[83]\n    getitem_2304 = _foreach_sqrt_1[84]\n    getitem_2305 = _foreach_sqrt_1[85]\n    getitem_2306 = _foreach_sqrt_1[86]\n    getitem_2307 = _foreach_sqrt_1[87]\n    getitem_2308 = _foreach_sqrt_1[88]\n    getitem_2309 = _foreach_sqrt_1[89]\n    getitem_2310 = _foreach_sqrt_1[90]\n    getitem_2311 = _foreach_sqrt_1[91]\n    getitem_2312 = _foreach_sqrt_1[92]\n    getitem_2313 = _foreach_sqrt_1[93]\n    getitem_2314 = _foreach_sqrt_1[94]\n    getitem_2315 = _foreach_sqrt_1[95]\n    getitem_2316 = _foreach_sqrt_1[96]\n    getitem_2317 = _foreach_sqrt_1[97]\n    getitem_2318 = _foreach_sqrt_1[98]\n    getitem_2319 = _foreach_sqrt_1[99]\n    getitem_2320 = _foreach_sqrt_1[100]\n    getitem_2321 = _foreach_sqrt_1[101]\n    getitem_2322 = _foreach_sqrt_1[102]\n    getitem_2323 = _foreach_sqrt_1[103]\n    getitem_2324 = _foreach_sqrt_1[104]\n    getitem_2325 = _foreach_sqrt_1[105]\n    getitem_2326 = _foreach_sqrt_1[106]\n    getitem_2327 = _foreach_sqrt_1[107]\n    getitem_2328 = _foreach_sqrt_1[108]\n    getitem_2329 = _foreach_sqrt_1[109]\n    getitem_2330 = _foreach_sqrt_1[110]\n    getitem_2331 = _foreach_sqrt_1[111]\n    getitem_2332 = _foreach_sqrt_1[112]\n    getitem_2333 = _foreach_sqrt_1[113]\n    getitem_2334 = _foreach_sqrt_1[114]\n    getitem_2335 = _foreach_sqrt_1[115]\n    getitem_2336 = _foreach_sqrt_1[116]\n    getitem_2337 = _foreach_sqrt_1[117]\n    getitem_2338 = _foreach_sqrt_1[118]\n    getitem_2339 = _foreach_sqrt_1[119]\n    getitem_2340 = _foreach_sqrt_1[120]\n    getitem_2341 = _foreach_sqrt_1[121]\n    getitem_2342 = _foreach_sqrt_1[122]\n    getitem_2343 = _foreach_sqrt_1[123]\n    getitem_2344 = _foreach_sqrt_1[124]\n    getitem_2345 = _foreach_sqrt_1[125]\n    getitem_2346 = _foreach_sqrt_1[126]\n    getitem_2347 = _foreach_sqrt_1[127]\n    getitem_2348 = _foreach_sqrt_1[128]\n    getitem_2349 = _foreach_sqrt_1[129]\n    getitem_2350 = _foreach_sqrt_1[130]\n    getitem_2351 = _foreach_sqrt_1[131]\n    getitem_2352 = _foreach_sqrt_1[132]\n    getitem_2353 = _foreach_sqrt_1[133]\n    getitem_2354 = _foreach_sqrt_1[134]\n    getitem_2355 = _foreach_sqrt_1[135]\n    getitem_2356 = _foreach_sqrt_1[136]\n    getitem_2357 = _foreach_sqrt_1[137]\n    getitem_2358 = _foreach_sqrt_1[138]\n    getitem_2359 = _foreach_sqrt_1[139]\n    getitem_2360 = _foreach_sqrt_1[140]\n    getitem_2361 = _foreach_sqrt_1[141]\n    getitem_2362 = _foreach_sqrt_1[142]\n    getitem_2363 = _foreach_sqrt_1[143]\n    getitem_2364 = _foreach_sqrt_1[144]\n    getitem_2365 = _foreach_sqrt_1[145]\n    getitem_2366 = _foreach_sqrt_1[146]\n    getitem_2367 = _foreach_sqrt_1[147];  _foreach_sqrt_1 = None\n    _foreach_div_1 = torch.ops.aten._foreach_div.List([getitem_2220, getitem_2221, getitem_2222, getitem_2223, getitem_2224, getitem_2225, getitem_2226, getitem_2227, getitem_2228, getitem_2229, getitem_2230, getitem_2231, getitem_2232, getitem_2233, getitem_2234, getitem_2235, getitem_2236, getitem_2237, getitem_2238, getitem_2239, getitem_2240, getitem_2241, getitem_2242, getitem_2243, getitem_2244, getitem_2245, getitem_2246, getitem_2247, getitem_2248, getitem_2249, getitem_2250, getitem_2251, getitem_2252, getitem_2253, getitem_2254, getitem_2255, getitem_2256, getitem_2257, getitem_2258, getitem_2259, getitem_2260, getitem_2261, getitem_2262, getitem_2263, getitem_2264, getitem_2265, getitem_2266, getitem_2267, getitem_2268, getitem_2269, getitem_2270, getitem_2271, getitem_2272, getitem_2273, getitem_2274, getitem_2275, getitem_2276, getitem_2277, getitem_2278, getitem_2279, getitem_2280, getitem_2281, getitem_2282, getitem_2283, getitem_2284, getitem_2285, getitem_2286, getitem_2287, getitem_2288, getitem_2289, getitem_2290, getitem_2291, getitem_2292, getitem_2293, getitem_2294, getitem_2295, getitem_2296, getitem_2297, getitem_2298, getitem_2299, getitem_2300, getitem_2301, getitem_2302, getitem_2303, getitem_2304, getitem_2305, getitem_2306, getitem_2307, getitem_2308, getitem_2309, getitem_2310, getitem_2311, getitem_2312, getitem_2313, getitem_2314, getitem_2315, getitem_2316, getitem_2317, getitem_2318, getitem_2319, getitem_2320, getitem_2321, getitem_2322, getitem_2323, getitem_2324, getitem_2325, getitem_2326, getitem_2327, getitem_2328, getitem_2329, getitem_2330, getitem_2331, getitem_2332, getitem_2333, getitem_2334, getitem_2335, getitem_2336, getitem_2337, getitem_2338, getitem_2339, getitem_2340, getitem_2341, getitem_2342, getitem_2343, getitem_2344, getitem_2345, getitem_2346, getitem_2347, getitem_2348, getitem_2349, getitem_2350, getitem_2351, getitem_2352, getitem_2353, getitem_2354, getitem_2355, getitem_2356, getitem_2357, getitem_2358, getitem_2359, getitem_2360, getitem_2361, getitem_2362, getitem_2363, getitem_2364, getitem_2365, getitem_2366, getitem_2367], [getitem_2072, getitem_2073, getitem_2074, getitem_2075, getitem_2076, getitem_2077, getitem_2078, getitem_2079, getitem_2080, getitem_2081, getitem_2082, getitem_2083, getitem_2084, getitem_2085, getitem_2086, getitem_2087, getitem_2088, getitem_2089, getitem_2090, getitem_2091, getitem_2092, getitem_2093, getitem_2094, getitem_2095, getitem_2096, getitem_2097, getitem_2098, getitem_2099, getitem_2100, getitem_2101, getitem_2102, getitem_2103, getitem_2104, getitem_2105, getitem_2106, getitem_2107, getitem_2108, getitem_2109, getitem_2110, getitem_2111, getitem_2112, getitem_2113, getitem_2114, getitem_2115, getitem_2116, getitem_2117, getitem_2118, getitem_2119, getitem_2120, getitem_2121, getitem_2122, getitem_2123, getitem_2124, getitem_2125, getitem_2126, getitem_2127, getitem_2128, getitem_2129, getitem_2130, getitem_2131, getitem_2132, getitem_2133, getitem_2134, getitem_2135, getitem_2136, getitem_2137, getitem_2138, getitem_2139, getitem_2140, getitem_2141, getitem_2142, getitem_2143, getitem_2144, getitem_2145, getitem_2146, getitem_2147, getitem_2148, getitem_2149, getitem_2150, getitem_2151, getitem_2152, getitem_2153, getitem_2154, getitem_2155, getitem_2156, getitem_2157, getitem_2158, getitem_2159, getitem_2160, getitem_2161, getitem_2162, getitem_2163, getitem_2164, getitem_2165, getitem_2166, getitem_2167, getitem_2168, getitem_2169, getitem_2170, getitem_2171, getitem_2172, getitem_2173, getitem_2174, getitem_2175, getitem_2176, getitem_2177, getitem_2178, getitem_2179, getitem_2180, getitem_2181, getitem_2182, getitem_2183, getitem_2184, getitem_2185, getitem_2186, getitem_2187, getitem_2188, getitem_2189, getitem_2190, getitem_2191, getitem_2192, getitem_2193, getitem_2194, getitem_2195, getitem_2196, getitem_2197, getitem_2198, getitem_2199, getitem_2200, getitem_2201, getitem_2202, getitem_2203, getitem_2204, getitem_2205, getitem_2206, getitem_2207, getitem_2208, getitem_2209, getitem_2210, getitem_2211, getitem_2212, getitem_2213, getitem_2214, getitem_2215, getitem_2216, getitem_2217, getitem_2218, getitem_2219]);  getitem_2220 = getitem_2221 = getitem_2222 = getitem_2223 = getitem_2224 = getitem_2225 = getitem_2226 = getitem_2227 = getitem_2228 = getitem_2229 = getitem_2230 = getitem_2231 = getitem_2232 = getitem_2233 = getitem_2234 = getitem_2235 = getitem_2236 = getitem_2237 = getitem_2238 = getitem_2239 = getitem_2240 = getitem_2241 = getitem_2242 = getitem_2243 = getitem_2244 = getitem_2245 = getitem_2246 = getitem_2247 = getitem_2248 = getitem_2249 = getitem_2250 = getitem_2251 = getitem_2252 = getitem_2253 = getitem_2254 = getitem_2255 = getitem_2256 = getitem_2257 = getitem_2258 = getitem_2259 = getitem_2260 = getitem_2261 = getitem_2262 = getitem_2263 = getitem_2264 = getitem_2265 = getitem_2266 = getitem_2267 = getitem_2268 = getitem_2269 = getitem_2270 = getitem_2271 = getitem_2272 = getitem_2273 = getitem_2274 = getitem_2275 = getitem_2276 = getitem_2277 = getitem_2278 = getitem_2279 = getitem_2280 = getitem_2281 = getitem_2282 = getitem_2283 = getitem_2284 = getitem_2285 = getitem_2286 = getitem_2287 = getitem_2288 = getitem_2289 = getitem_2290 = getitem_2291 = getitem_2292 = getitem_2293 = getitem_2294 = getitem_2295 = getitem_2296 = getitem_2297 = getitem_2298 = getitem_2299 = getitem_2300 = getitem_2301 = getitem_2302 = getitem_2303 = getitem_2304 = getitem_2305 = getitem_2306 = getitem_2307 = getitem_2308 = getitem_2309 = getitem_2310 = getitem_2311 = getitem_2312 = getitem_2313 = getitem_2314 = getitem_2315 = getitem_2316 = getitem_2317 = getitem_2318 = getitem_2319 = getitem_2320 = getitem_2321 = getitem_2322 = getitem_2323 = getitem_2324 = getitem_2325 = getitem_2326 = getitem_2327 = getitem_2328 = getitem_2329 = getitem_2330 = getitem_2331 = getitem_2332 = getitem_2333 = getitem_2334 = getitem_2335 = getitem_2336 = getitem_2337 = getitem_2338 = getitem_2339 = getitem_2340 = getitem_2341 = getitem_2342 = getitem_2343 = getitem_2344 = getitem_2345 = getitem_2346 = getitem_2347 = getitem_2348 = getitem_2349 = getitem_2350 = getitem_2351 = getitem_2352 = getitem_2353 = getitem_2354 = getitem_2355 = getitem_2356 = getitem_2357 = getitem_2358 = getitem_2359 = getitem_2360 = getitem_2361 = getitem_2362 = getitem_2363 = getitem_2364 = getitem_2365 = getitem_2366 = getitem_2367 = getitem_2072 = getitem_2073 = getitem_2074 = getitem_2075 = getitem_2076 = getitem_2077 = getitem_2078 = getitem_2079 = getitem_2080 = getitem_2081 = getitem_2082 = getitem_2083 = getitem_2084 = getitem_2085 = getitem_2086 = getitem_2087 = getitem_2088 = getitem_2089 = getitem_2090 = getitem_2091 = getitem_2092 = getitem_2093 = getitem_2094 = getitem_2095 = getitem_2096 = getitem_2097 = getitem_2098 = getitem_2099 = getitem_2100 = getitem_2101 = getitem_2102 = getitem_2103 = getitem_2104 = getitem_2105 = getitem_2106 = getitem_2107 = getitem_2108 = getitem_2109 = getitem_2110 = getitem_2111 = getitem_2112 = getitem_2113 = getitem_2114 = getitem_2115 = getitem_2116 = getitem_2117 = getitem_2118 = getitem_2119 = getitem_2120 = getitem_2121 = getitem_2122 = getitem_2123 = getitem_2124 = getitem_2125 = getitem_2126 = getitem_2127 = getitem_2128 = getitem_2129 = getitem_2130 = getitem_2131 = getitem_2132 = getitem_2133 = getitem_2134 = getitem_2135 = getitem_2136 = getitem_2137 = getitem_2138 = getitem_2139 = getitem_2140 = getitem_2141 = getitem_2142 = getitem_2143 = getitem_2144 = getitem_2145 = getitem_2146 = getitem_2147 = getitem_2148 = getitem_2149 = getitem_2150 = getitem_2151 = getitem_2152 = getitem_2153 = getitem_2154 = getitem_2155 = getitem_2156 = getitem_2157 = getitem_2158 = getitem_2159 = getitem_2160 = getitem_2161 = getitem_2162 = getitem_2163 = getitem_2164 = getitem_2165 = getitem_2166 = getitem_2167 = getitem_2168 = getitem_2169 = getitem_2170 = getitem_2171 = getitem_2172 = getitem_2173 = getitem_2174 = getitem_2175 = getitem_2176 = getitem_2177 = getitem_2178 = getitem_2179 = getitem_2180 = getitem_2181 = getitem_2182 = getitem_2183 = getitem_2184 = getitem_2185 = getitem_2186 = getitem_2187 = getitem_2188 = getitem_2189 = getitem_2190 = getitem_2191 = getitem_2192 = getitem_2193 = getitem_2194 = getitem_2195 = getitem_2196 = getitem_2197 = getitem_2198 = getitem_2199 = getitem_2200 = getitem_2201 = getitem_2202 = getitem_2203 = getitem_2204 = getitem_2205 = getitem_2206 = getitem_2207 = getitem_2208 = getitem_2209 = getitem_2210 = getitem_2211 = getitem_2212 = getitem_2213 = getitem_2214 = getitem_2215 = getitem_2216 = getitem_2217 = getitem_2218 = getitem_2219 = None\n    getitem_2368 = _foreach_div_1[0]\n    getitem_2369 = _foreach_div_1[1]\n    getitem_2370 = _foreach_div_1[2]\n    getitem_2371 = _foreach_div_1[3]\n    getitem_2372 = _foreach_div_1[4]\n    getitem_2373 = _foreach_div_1[5]\n    getitem_2374 = _foreach_div_1[6]\n    getitem_2375 = _foreach_div_1[7]\n    getitem_2376 = _foreach_div_1[8]\n    getitem_2377 = _foreach_div_1[9]\n    getitem_2378 = _foreach_div_1[10]\n    getitem_2379 = _foreach_div_1[11]\n    getitem_2380 = _foreach_div_1[12]\n    getitem_2381 = _foreach_div_1[13]\n    getitem_2382 = _foreach_div_1[14]\n    getitem_2383 = _foreach_div_1[15]\n    getitem_2384 = _foreach_div_1[16]\n    getitem_2385 = _foreach_div_1[17]\n    getitem_2386 = _foreach_div_1[18]\n    getitem_2387 = _foreach_div_1[19]\n    getitem_2388 = _foreach_div_1[20]\n    getitem_2389 = _foreach_div_1[21]\n    getitem_2390 = _foreach_div_1[22]\n    getitem_2391 = _foreach_div_1[23]\n    getitem_2392 = _foreach_div_1[24]\n    getitem_2393 = _foreach_div_1[25]\n    getitem_2394 = _foreach_div_1[26]\n    getitem_2395 = _foreach_div_1[27]\n    getitem_2396 = _foreach_div_1[28]\n    getitem_2397 = _foreach_div_1[29]\n    getitem_2398 = _foreach_div_1[30]\n    getitem_2399 = _foreach_div_1[31]\n    getitem_2400 = _foreach_div_1[32]\n    getitem_2401 = _foreach_div_1[33]\n    getitem_2402 = _foreach_div_1[34]\n    getitem_2403 = _foreach_div_1[35]\n    getitem_2404 = _foreach_div_1[36]\n    getitem_2405 = _foreach_div_1[37]\n    getitem_2406 = _foreach_div_1[38]\n    getitem_2407 = _foreach_div_1[39]\n    getitem_2408 = _foreach_div_1[40]\n    getitem_2409 = _foreach_div_1[41]\n    getitem_2410 = _foreach_div_1[42]\n    getitem_2411 = _foreach_div_1[43]\n    getitem_2412 = _foreach_div_1[44]\n    getitem_2413 = _foreach_div_1[45]\n    getitem_2414 = _foreach_div_1[46]\n    getitem_2415 = _foreach_div_1[47]\n    getitem_2416 = _foreach_div_1[48]\n    getitem_2417 = _foreach_div_1[49]\n    getitem_2418 = _foreach_div_1[50]\n    getitem_2419 = _foreach_div_1[51]\n    getitem_2420 = _foreach_div_1[52]\n    getitem_2421 = _foreach_div_1[53]\n    getitem_2422 = _foreach_div_1[54]\n    getitem_2423 = _foreach_div_1[55]\n    getitem_2424 = _foreach_div_1[56]\n    getitem_2425 = _foreach_div_1[57]\n    getitem_2426 = _foreach_div_1[58]\n    getitem_2427 = _foreach_div_1[59]\n    getitem_2428 = _foreach_div_1[60]\n    getitem_2429 = _foreach_div_1[61]\n    getitem_2430 = _foreach_div_1[62]\n    getitem_2431 = _foreach_div_1[63]\n    getitem_2432 = _foreach_div_1[64]\n    getitem_2433 = _foreach_div_1[65]\n    getitem_2434 = _foreach_div_1[66]\n    getitem_2435 = _foreach_div_1[67]\n    getitem_2436 = _foreach_div_1[68]\n    getitem_2437 = _foreach_div_1[69]\n    getitem_2438 = _foreach_div_1[70]\n    getitem_2439 = _foreach_div_1[71]\n    getitem_2440 = _foreach_div_1[72]\n    getitem_2441 = _foreach_div_1[73]\n    getitem_2442 = _foreach_div_1[74]\n    getitem_2443 = _foreach_div_1[75]\n    getitem_2444 = _foreach_div_1[76]\n    getitem_2445 = _foreach_div_1[77]\n    getitem_2446 = _foreach_div_1[78]\n    getitem_2447 = _foreach_div_1[79]\n    getitem_2448 = _foreach_div_1[80]\n    getitem_2449 = _foreach_div_1[81]\n    getitem_2450 = _foreach_div_1[82]\n    getitem_2451 = _foreach_div_1[83]\n    getitem_2452 = _foreach_div_1[84]\n    getitem_2453 = _foreach_div_1[85]\n    getitem_2454 = _foreach_div_1[86]\n    getitem_2455 = _foreach_div_1[87]\n    getitem_2456 = _foreach_div_1[88]\n    getitem_2457 = _foreach_div_1[89]\n    getitem_2458 = _foreach_div_1[90]\n    getitem_2459 = _foreach_div_1[91]\n    getitem_2460 = _foreach_div_1[92]\n    getitem_2461 = _foreach_div_1[93]\n    getitem_2462 = _foreach_div_1[94]\n    getitem_2463 = _foreach_div_1[95]\n    getitem_2464 = _foreach_div_1[96]\n    getitem_2465 = _foreach_div_1[97]\n    getitem_2466 = _foreach_div_1[98]\n    getitem_2467 = _foreach_div_1[99]\n    getitem_2468 = _foreach_div_1[100]\n    getitem_2469 = _foreach_div_1[101]\n    getitem_2470 = _foreach_div_1[102]\n    getitem_2471 = _foreach_div_1[103]\n    getitem_2472 = _foreach_div_1[104]\n    getitem_2473 = _foreach_div_1[105]\n    getitem_2474 = _foreach_div_1[106]\n    getitem_2475 = _foreach_div_1[107]\n    getitem_2476 = _foreach_div_1[108]\n    getitem_2477 = _foreach_div_1[109]\n    getitem_2478 = _foreach_div_1[110]\n    getitem_2479 = _foreach_div_1[111]\n    getitem_2480 = _foreach_div_1[112]\n    getitem_2481 = _foreach_div_1[113]\n    getitem_2482 = _foreach_div_1[114]\n    getitem_2483 = _foreach_div_1[115]\n    getitem_2484 = _foreach_div_1[116]\n    getitem_2485 = _foreach_div_1[117]\n    getitem_2486 = _foreach_div_1[118]\n    getitem_2487 = _foreach_div_1[119]\n    getitem_2488 = _foreach_div_1[120]\n    getitem_2489 = _foreach_div_1[121]\n    getitem_2490 = _foreach_div_1[122]\n    getitem_2491 = _foreach_div_1[123]\n    getitem_2492 = _foreach_div_1[124]\n    getitem_2493 = _foreach_div_1[125]\n    getitem_2494 = _foreach_div_1[126]\n    getitem_2495 = _foreach_div_1[127]\n    getitem_2496 = _foreach_div_1[128]\n    getitem_2497 = _foreach_div_1[129]\n    getitem_2498 = _foreach_div_1[130]\n    getitem_2499 = _foreach_div_1[131]\n    getitem_2500 = _foreach_div_1[132]\n    getitem_2501 = _foreach_div_1[133]\n    getitem_2502 = _foreach_div_1[134]\n    getitem_2503 = _foreach_div_1[135]\n    getitem_2504 = _foreach_div_1[136]\n    getitem_2505 = _foreach_div_1[137]\n    getitem_2506 = _foreach_div_1[138]\n    getitem_2507 = _foreach_div_1[139]\n    getitem_2508 = _foreach_div_1[140]\n    getitem_2509 = _foreach_div_1[141]\n    getitem_2510 = _foreach_div_1[142]\n    getitem_2511 = _foreach_div_1[143]\n    getitem_2512 = _foreach_div_1[144]\n    getitem_2513 = _foreach_div_1[145]\n    getitem_2514 = _foreach_div_1[146]\n    getitem_2515 = _foreach_div_1[147];  _foreach_div_1 = None\n    _foreach_add_3 = torch.ops.aten._foreach_add.Scalar([getitem_2368, getitem_2369, getitem_2370, getitem_2371, getitem_2372, getitem_2373, getitem_2374, getitem_2375, getitem_2376, getitem_2377, getitem_2378, getitem_2379, getitem_2380, getitem_2381, getitem_2382, getitem_2383, getitem_2384, getitem_2385, getitem_2386, getitem_2387, getitem_2388, getitem_2389, getitem_2390, getitem_2391, getitem_2392, getitem_2393, getitem_2394, getitem_2395, getitem_2396, getitem_2397, getitem_2398, getitem_2399, getitem_2400, getitem_2401, getitem_2402, getitem_2403, getitem_2404, getitem_2405, getitem_2406, getitem_2407, getitem_2408, getitem_2409, getitem_2410, getitem_2411, getitem_2412, getitem_2413, getitem_2414, getitem_2415, getitem_2416, getitem_2417, getitem_2418, getitem_2419, getitem_2420, getitem_2421, getitem_2422, getitem_2423, getitem_2424, getitem_2425, getitem_2426, getitem_2427, getitem_2428, getitem_2429, getitem_2430, getitem_2431, getitem_2432, getitem_2433, getitem_2434, getitem_2435, getitem_2436, getitem_2437, getitem_2438, getitem_2439, getitem_2440, getitem_2441, getitem_2442, getitem_2443, getitem_2444, getitem_2445, getitem_2446, getitem_2447, getitem_2448, getitem_2449, getitem_2450, getitem_2451, getitem_2452, getitem_2453, getitem_2454, getitem_2455, getitem_2456, getitem_2457, getitem_2458, getitem_2459, getitem_2460, getitem_2461, getitem_2462, getitem_2463, getitem_2464, getitem_2465, getitem_2466, getitem_2467, getitem_2468, getitem_2469, getitem_2470, getitem_2471, getitem_2472, getitem_2473, getitem_2474, getitem_2475, getitem_2476, getitem_2477, getitem_2478, getitem_2479, getitem_2480, getitem_2481, getitem_2482, getitem_2483, getitem_2484, getitem_2485, getitem_2486, getitem_2487, getitem_2488, getitem_2489, getitem_2490, getitem_2491, getitem_2492, getitem_2493, getitem_2494, getitem_2495, getitem_2496, getitem_2497, getitem_2498, getitem_2499, getitem_2500, getitem_2501, getitem_2502, getitem_2503, getitem_2504, getitem_2505, getitem_2506, getitem_2507, getitem_2508, getitem_2509, getitem_2510, getitem_2511, getitem_2512, getitem_2513, getitem_2514, getitem_2515], 1e-08);  getitem_2368 = getitem_2369 = getitem_2370 = getitem_2371 = getitem_2372 = getitem_2373 = getitem_2374 = getitem_2375 = getitem_2376 = getitem_2377 = getitem_2378 = getitem_2379 = getitem_2380 = getitem_2381 = getitem_2382 = getitem_2383 = getitem_2384 = getitem_2385 = getitem_2386 = getitem_2387 = getitem_2388 = getitem_2389 = getitem_2390 = getitem_2391 = getitem_2392 = getitem_2393 = getitem_2394 = getitem_2395 = getitem_2396 = getitem_2397 = getitem_2398 = getitem_2399 = getitem_2400 = getitem_2401 = getitem_2402 = getitem_2403 = getitem_2404 = getitem_2405 = getitem_2406 = getitem_2407 = getitem_2408 = getitem_2409 = getitem_2410 = getitem_2411 = getitem_2412 = getitem_2413 = getitem_2414 = getitem_2415 = getitem_2416 = getitem_2417 = getitem_2418 = getitem_2419 = getitem_2420 = getitem_2421 = getitem_2422 = getitem_2423 = getitem_2424 = getitem_2425 = getitem_2426 = getitem_2427 = getitem_2428 = getitem_2429 = getitem_2430 = getitem_2431 = getitem_2432 = getitem_2433 = getitem_2434 = getitem_2435 = getitem_2436 = getitem_2437 = getitem_2438 = getitem_2439 = getitem_2440 = getitem_2441 = getitem_2442 = getitem_2443 = getitem_2444 = getitem_2445 = getitem_2446 = getitem_2447 = getitem_2448 = getitem_2449 = getitem_2450 = getitem_2451 = getitem_2452 = getitem_2453 = getitem_2454 = getitem_2455 = getitem_2456 = getitem_2457 = getitem_2458 = getitem_2459 = getitem_2460 = getitem_2461 = getitem_2462 = getitem_2463 = getitem_2464 = getitem_2465 = getitem_2466 = getitem_2467 = getitem_2468 = getitem_2469 = getitem_2470 = getitem_2471 = getitem_2472 = getitem_2473 = getitem_2474 = getitem_2475 = getitem_2476 = getitem_2477 = getitem_2478 = getitem_2479 = getitem_2480 = getitem_2481 = getitem_2482 = getitem_2483 = getitem_2484 = getitem_2485 = getitem_2486 = getitem_2487 = getitem_2488 = getitem_2489 = getitem_2490 = getitem_2491 = getitem_2492 = getitem_2493 = getitem_2494 = getitem_2495 = getitem_2496 = getitem_2497 = getitem_2498 = getitem_2499 = getitem_2500 = getitem_2501 = getitem_2502 = getitem_2503 = getitem_2504 = getitem_2505 = getitem_2506 = getitem_2507 = getitem_2508 = getitem_2509 = getitem_2510 = getitem_2511 = getitem_2512 = getitem_2513 = getitem_2514 = getitem_2515 = None\n    getitem_2516 = _foreach_add_3[0]\n    getitem_2517 = _foreach_add_3[1]\n    getitem_2518 = _foreach_add_3[2]\n    getitem_2519 = _foreach_add_3[3]\n    getitem_2520 = _foreach_add_3[4]\n    getitem_2521 = _foreach_add_3[5]\n    getitem_2522 = _foreach_add_3[6]\n    getitem_2523 = _foreach_add_3[7]\n    getitem_2524 = _foreach_add_3[8]\n    getitem_2525 = _foreach_add_3[9]\n    getitem_2526 = _foreach_add_3[10]\n    getitem_2527 = _foreach_add_3[11]\n    getitem_2528 = _foreach_add_3[12]\n    getitem_2529 = _foreach_add_3[13]\n    getitem_2530 = _foreach_add_3[14]\n    getitem_2531 = _foreach_add_3[15]\n    getitem_2532 = _foreach_add_3[16]\n    getitem_2533 = _foreach_add_3[17]\n    getitem_2534 = _foreach_add_3[18]\n    getitem_2535 = _foreach_add_3[19]\n    getitem_2536 = _foreach_add_3[20]\n    getitem_2537 = _foreach_add_3[21]\n    getitem_2538 = _foreach_add_3[22]\n    getitem_2539 = _foreach_add_3[23]\n    getitem_2540 = _foreach_add_3[24]\n    getitem_2541 = _foreach_add_3[25]\n    getitem_2542 = _foreach_add_3[26]\n    getitem_2543 = _foreach_add_3[27]\n    getitem_2544 = _foreach_add_3[28]\n    getitem_2545 = _foreach_add_3[29]\n    getitem_2546 = _foreach_add_3[30]\n    getitem_2547 = _foreach_add_3[31]\n    getitem_2548 = _foreach_add_3[32]\n    getitem_2549 = _foreach_add_3[33]\n    getitem_2550 = _foreach_add_3[34]\n    getitem_2551 = _foreach_add_3[35]\n    getitem_2552 = _foreach_add_3[36]\n    getitem_2553 = _foreach_add_3[37]\n    getitem_2554 = _foreach_add_3[38]\n    getitem_2555 = _foreach_add_3[39]\n    getitem_2556 = _foreach_add_3[40]\n    getitem_2557 = _foreach_add_3[41]\n    getitem_2558 = _foreach_add_3[42]\n    getitem_2559 = _foreach_add_3[43]\n    getitem_2560 = _foreach_add_3[44]\n    getitem_2561 = _foreach_add_3[45]\n    getitem_2562 = _foreach_add_3[46]\n    getitem_2563 = _foreach_add_3[47]\n    getitem_2564 = _foreach_add_3[48]\n    getitem_2565 = _foreach_add_3[49]\n    getitem_2566 = _foreach_add_3[50]\n    getitem_2567 = _foreach_add_3[51]\n    getitem_2568 = _foreach_add_3[52]\n    getitem_2569 = _foreach_add_3[53]\n    getitem_2570 = _foreach_add_3[54]\n    getitem_2571 = _foreach_add_3[55]\n    getitem_2572 = _foreach_add_3[56]\n    getitem_2573 = _foreach_add_3[57]\n    getitem_2574 = _foreach_add_3[58]\n    getitem_2575 = _foreach_add_3[59]\n    getitem_2576 = _foreach_add_3[60]\n    getitem_2577 = _foreach_add_3[61]\n    getitem_2578 = _foreach_add_3[62]\n    getitem_2579 = _foreach_add_3[63]\n    getitem_2580 = _foreach_add_3[64]\n    getitem_2581 = _foreach_add_3[65]\n    getitem_2582 = _foreach_add_3[66]\n    getitem_2583 = _foreach_add_3[67]\n    getitem_2584 = _foreach_add_3[68]\n    getitem_2585 = _foreach_add_3[69]\n    getitem_2586 = _foreach_add_3[70]\n    getitem_2587 = _foreach_add_3[71]\n    getitem_2588 = _foreach_add_3[72]\n    getitem_2589 = _foreach_add_3[73]\n    getitem_2590 = _foreach_add_3[74]\n    getitem_2591 = _foreach_add_3[75]\n    getitem_2592 = _foreach_add_3[76]\n    getitem_2593 = _foreach_add_3[77]\n    getitem_2594 = _foreach_add_3[78]\n    getitem_2595 = _foreach_add_3[79]\n    getitem_2596 = _foreach_add_3[80]\n    getitem_2597 = _foreach_add_3[81]\n    getitem_2598 = _foreach_add_3[82]\n    getitem_2599 = _foreach_add_3[83]\n    getitem_2600 = _foreach_add_3[84]\n    getitem_2601 = _foreach_add_3[85]\n    getitem_2602 = _foreach_add_3[86]\n    getitem_2603 = _foreach_add_3[87]\n    getitem_2604 = _foreach_add_3[88]\n    getitem_2605 = _foreach_add_3[89]\n    getitem_2606 = _foreach_add_3[90]\n    getitem_2607 = _foreach_add_3[91]\n    getitem_2608 = _foreach_add_3[92]\n    getitem_2609 = _foreach_add_3[93]\n    getitem_2610 = _foreach_add_3[94]\n    getitem_2611 = _foreach_add_3[95]\n    getitem_2612 = _foreach_add_3[96]\n    getitem_2613 = _foreach_add_3[97]\n    getitem_2614 = _foreach_add_3[98]\n    getitem_2615 = _foreach_add_3[99]\n    getitem_2616 = _foreach_add_3[100]\n    getitem_2617 = _foreach_add_3[101]\n    getitem_2618 = _foreach_add_3[102]\n    getitem_2619 = _foreach_add_3[103]\n    getitem_2620 = _foreach_add_3[104]\n    getitem_2621 = _foreach_add_3[105]\n    getitem_2622 = _foreach_add_3[106]\n    getitem_2623 = _foreach_add_3[107]\n    getitem_2624 = _foreach_add_3[108]\n    getitem_2625 = _foreach_add_3[109]\n    getitem_2626 = _foreach_add_3[110]\n    getitem_2627 = _foreach_add_3[111]\n    getitem_2628 = _foreach_add_3[112]\n    getitem_2629 = _foreach_add_3[113]\n    getitem_2630 = _foreach_add_3[114]\n    getitem_2631 = _foreach_add_3[115]\n    getitem_2632 = _foreach_add_3[116]\n    getitem_2633 = _foreach_add_3[117]\n    getitem_2634 = _foreach_add_3[118]\n    getitem_2635 = _foreach_add_3[119]\n    getitem_2636 = _foreach_add_3[120]\n    getitem_2637 = _foreach_add_3[121]\n    getitem_2638 = _foreach_add_3[122]\n    getitem_2639 = _foreach_add_3[123]\n    getitem_2640 = _foreach_add_3[124]\n    getitem_2641 = _foreach_add_3[125]\n    getitem_2642 = _foreach_add_3[126]\n    getitem_2643 = _foreach_add_3[127]\n    getitem_2644 = _foreach_add_3[128]\n    getitem_2645 = _foreach_add_3[129]\n    getitem_2646 = _foreach_add_3[130]\n    getitem_2647 = _foreach_add_3[131]\n    getitem_2648 = _foreach_add_3[132]\n    getitem_2649 = _foreach_add_3[133]\n    getitem_2650 = _foreach_add_3[134]\n    getitem_2651 = _foreach_add_3[135]\n    getitem_2652 = _foreach_add_3[136]\n    getitem_2653 = _foreach_add_3[137]\n    getitem_2654 = _foreach_add_3[138]\n    getitem_2655 = _foreach_add_3[139]\n    getitem_2656 = _foreach_add_3[140]\n    getitem_2657 = _foreach_add_3[141]\n    getitem_2658 = _foreach_add_3[142]\n    getitem_2659 = _foreach_add_3[143]\n    getitem_2660 = _foreach_add_3[144]\n    getitem_2661 = _foreach_add_3[145]\n    getitem_2662 = _foreach_add_3[146]\n    getitem_2663 = _foreach_add_3[147];  _foreach_add_3 = None\n    _foreach_div_2 = torch.ops.aten._foreach_div.List([getitem_2516, getitem_2517, getitem_2518, getitem_2519, getitem_2520, getitem_2521, getitem_2522, getitem_2523, getitem_2524, getitem_2525, getitem_2526, getitem_2527, getitem_2528, getitem_2529, getitem_2530, getitem_2531, getitem_2532, getitem_2533, getitem_2534, getitem_2535, getitem_2536, getitem_2537, getitem_2538, getitem_2539, getitem_2540, getitem_2541, getitem_2542, getitem_2543, getitem_2544, getitem_2545, getitem_2546, getitem_2547, getitem_2548, getitem_2549, getitem_2550, getitem_2551, getitem_2552, getitem_2553, getitem_2554, getitem_2555, getitem_2556, getitem_2557, getitem_2558, getitem_2559, getitem_2560, getitem_2561, getitem_2562, getitem_2563, getitem_2564, getitem_2565, getitem_2566, getitem_2567, getitem_2568, getitem_2569, getitem_2570, getitem_2571, getitem_2572, getitem_2573, getitem_2574, getitem_2575, getitem_2576, getitem_2577, getitem_2578, getitem_2579, getitem_2580, getitem_2581, getitem_2582, getitem_2583, getitem_2584, getitem_2585, getitem_2586, getitem_2587, getitem_2588, getitem_2589, getitem_2590, getitem_2591, getitem_2592, getitem_2593, getitem_2594, getitem_2595, getitem_2596, getitem_2597, getitem_2598, getitem_2599, getitem_2600, getitem_2601, getitem_2602, getitem_2603, getitem_2604, getitem_2605, getitem_2606, getitem_2607, getitem_2608, getitem_2609, getitem_2610, getitem_2611, getitem_2612, getitem_2613, getitem_2614, getitem_2615, getitem_2616, getitem_2617, getitem_2618, getitem_2619, getitem_2620, getitem_2621, getitem_2622, getitem_2623, getitem_2624, getitem_2625, getitem_2626, getitem_2627, getitem_2628, getitem_2629, getitem_2630, getitem_2631, getitem_2632, getitem_2633, getitem_2634, getitem_2635, getitem_2636, getitem_2637, getitem_2638, getitem_2639, getitem_2640, getitem_2641, getitem_2642, getitem_2643, getitem_2644, getitem_2645, getitem_2646, getitem_2647, getitem_2648, getitem_2649, getitem_2650, getitem_2651, getitem_2652, getitem_2653, getitem_2654, getitem_2655, getitem_2656, getitem_2657, getitem_2658, getitem_2659, getitem_2660, getitem_2661, getitem_2662, getitem_2663], [getitem_1924, getitem_1925, getitem_1926, getitem_1927, getitem_1928, getitem_1929, getitem_1930, getitem_1931, getitem_1932, getitem_1933, getitem_1934, getitem_1935, getitem_1936, getitem_1937, getitem_1938, getitem_1939, getitem_1940, getitem_1941, getitem_1942, getitem_1943, getitem_1944, getitem_1945, getitem_1946, getitem_1947, getitem_1948, getitem_1949, getitem_1950, getitem_1951, getitem_1952, getitem_1953, getitem_1954, getitem_1955, getitem_1956, getitem_1957, getitem_1958, getitem_1959, getitem_1960, getitem_1961, getitem_1962, getitem_1963, getitem_1964, getitem_1965, getitem_1966, getitem_1967, getitem_1968, getitem_1969, getitem_1970, getitem_1971, getitem_1972, getitem_1973, getitem_1974, getitem_1975, getitem_1976, getitem_1977, getitem_1978, getitem_1979, getitem_1980, getitem_1981, getitem_1982, getitem_1983, getitem_1984, getitem_1985, getitem_1986, getitem_1987, getitem_1988, getitem_1989, getitem_1990, getitem_1991, getitem_1992, getitem_1993, getitem_1994, getitem_1995, getitem_1996, getitem_1997, getitem_1998, getitem_1999, getitem_2000, getitem_2001, getitem_2002, getitem_2003, getitem_2004, getitem_2005, getitem_2006, getitem_2007, getitem_2008, getitem_2009, getitem_2010, getitem_2011, getitem_2012, getitem_2013, getitem_2014, getitem_2015, getitem_2016, getitem_2017, getitem_2018, getitem_2019, getitem_2020, getitem_2021, getitem_2022, getitem_2023, getitem_2024, getitem_2025, getitem_2026, getitem_2027, getitem_2028, getitem_2029, getitem_2030, getitem_2031, getitem_2032, getitem_2033, getitem_2034, getitem_2035, getitem_2036, getitem_2037, getitem_2038, getitem_2039, getitem_2040, getitem_2041, getitem_2042, getitem_2043, getitem_2044, getitem_2045, getitem_2046, getitem_2047, getitem_2048, getitem_2049, getitem_2050, getitem_2051, getitem_2052, getitem_2053, getitem_2054, getitem_2055, getitem_2056, getitem_2057, getitem_2058, getitem_2059, getitem_2060, getitem_2061, getitem_2062, getitem_2063, getitem_2064, getitem_2065, getitem_2066, getitem_2067, getitem_2068, getitem_2069, getitem_2070, getitem_2071]);  getitem_2516 = getitem_2517 = getitem_2518 = getitem_2519 = getitem_2520 = getitem_2521 = getitem_2522 = getitem_2523 = getitem_2524 = getitem_2525 = getitem_2526 = getitem_2527 = getitem_2528 = getitem_2529 = getitem_2530 = getitem_2531 = getitem_2532 = getitem_2533 = getitem_2534 = getitem_2535 = getitem_2536 = getitem_2537 = getitem_2538 = getitem_2539 = getitem_2540 = getitem_2541 = getitem_2542 = getitem_2543 = getitem_2544 = getitem_2545 = getitem_2546 = getitem_2547 = getitem_2548 = getitem_2549 = getitem_2550 = getitem_2551 = getitem_2552 = getitem_2553 = getitem_2554 = getitem_2555 = getitem_2556 = getitem_2557 = getitem_2558 = getitem_2559 = getitem_2560 = getitem_2561 = getitem_2562 = getitem_2563 = getitem_2564 = getitem_2565 = getitem_2566 = getitem_2567 = getitem_2568 = getitem_2569 = getitem_2570 = getitem_2571 = getitem_2572 = getitem_2573 = getitem_2574 = getitem_2575 = getitem_2576 = getitem_2577 = getitem_2578 = getitem_2579 = getitem_2580 = getitem_2581 = getitem_2582 = getitem_2583 = getitem_2584 = getitem_2585 = getitem_2586 = getitem_2587 = getitem_2588 = getitem_2589 = getitem_2590 = getitem_2591 = getitem_2592 = getitem_2593 = getitem_2594 = getitem_2595 = getitem_2596 = getitem_2597 = getitem_2598 = getitem_2599 = getitem_2600 = getitem_2601 = getitem_2602 = getitem_2603 = getitem_2604 = getitem_2605 = getitem_2606 = getitem_2607 = getitem_2608 = getitem_2609 = getitem_2610 = getitem_2611 = getitem_2612 = getitem_2613 = getitem_2614 = getitem_2615 = getitem_2616 = getitem_2617 = getitem_2618 = getitem_2619 = getitem_2620 = getitem_2621 = getitem_2622 = getitem_2623 = getitem_2624 = getitem_2625 = getitem_2626 = getitem_2627 = getitem_2628 = getitem_2629 = getitem_2630 = getitem_2631 = getitem_2632 = getitem_2633 = getitem_2634 = getitem_2635 = getitem_2636 = getitem_2637 = getitem_2638 = getitem_2639 = getitem_2640 = getitem_2641 = getitem_2642 = getitem_2643 = getitem_2644 = getitem_2645 = getitem_2646 = getitem_2647 = getitem_2648 = getitem_2649 = getitem_2650 = getitem_2651 = getitem_2652 = getitem_2653 = getitem_2654 = getitem_2655 = getitem_2656 = getitem_2657 = getitem_2658 = getitem_2659 = getitem_2660 = getitem_2661 = getitem_2662 = getitem_2663 = getitem_1924 = getitem_1925 = getitem_1926 = getitem_1927 = getitem_1928 = getitem_1929 = getitem_1930 = getitem_1931 = getitem_1932 = getitem_1933 = getitem_1934 = getitem_1935 = getitem_1936 = getitem_1937 = getitem_1938 = getitem_1939 = getitem_1940 = getitem_1941 = getitem_1942 = getitem_1943 = getitem_1944 = getitem_1945 = getitem_1946 = getitem_1947 = getitem_1948 = getitem_1949 = getitem_1950 = getitem_1951 = getitem_1952 = getitem_1953 = getitem_1954 = getitem_1955 = getitem_1956 = getitem_1957 = getitem_1958 = getitem_1959 = getitem_1960 = getitem_1961 = getitem_1962 = getitem_1963 = getitem_1964 = getitem_1965 = getitem_1966 = getitem_1967 = getitem_1968 = getitem_1969 = getitem_1970 = getitem_1971 = getitem_1972 = getitem_1973 = getitem_1974 = getitem_1975 = getitem_1976 = getitem_1977 = getitem_1978 = getitem_1979 = getitem_1980 = getitem_1981 = getitem_1982 = getitem_1983 = getitem_1984 = getitem_1985 = getitem_1986 = getitem_1987 = getitem_1988 = getitem_1989 = getitem_1990 = getitem_1991 = getitem_1992 = getitem_1993 = getitem_1994 = getitem_1995 = getitem_1996 = getitem_1997 = getitem_1998 = getitem_1999 = getitem_2000 = getitem_2001 = getitem_2002 = getitem_2003 = getitem_2004 = getitem_2005 = getitem_2006 = getitem_2007 = getitem_2008 = getitem_2009 = getitem_2010 = getitem_2011 = getitem_2012 = getitem_2013 = getitem_2014 = getitem_2015 = getitem_2016 = getitem_2017 = getitem_2018 = getitem_2019 = getitem_2020 = getitem_2021 = getitem_2022 = getitem_2023 = getitem_2024 = getitem_2025 = getitem_2026 = getitem_2027 = getitem_2028 = getitem_2029 = getitem_2030 = getitem_2031 = getitem_2032 = getitem_2033 = getitem_2034 = getitem_2035 = getitem_2036 = getitem_2037 = getitem_2038 = getitem_2039 = getitem_2040 = getitem_2041 = getitem_2042 = getitem_2043 = getitem_2044 = getitem_2045 = getitem_2046 = getitem_2047 = getitem_2048 = getitem_2049 = getitem_2050 = getitem_2051 = getitem_2052 = getitem_2053 = getitem_2054 = getitem_2055 = getitem_2056 = getitem_2057 = getitem_2058 = getitem_2059 = getitem_2060 = getitem_2061 = getitem_2062 = getitem_2063 = getitem_2064 = getitem_2065 = getitem_2066 = getitem_2067 = getitem_2068 = getitem_2069 = getitem_2070 = getitem_2071 = None\n    getitem_2664 = _foreach_div_2[0]\n    getitem_2665 = _foreach_div_2[1]\n    getitem_2666 = _foreach_div_2[2]\n    getitem_2667 = _foreach_div_2[3]\n    getitem_2668 = _foreach_div_2[4]\n    getitem_2669 = _foreach_div_2[5]\n    getitem_2670 = _foreach_div_2[6]\n    getitem_2671 = _foreach_div_2[7]\n    getitem_2672 = _foreach_div_2[8]\n    getitem_2673 = _foreach_div_2[9]\n    getitem_2674 = _foreach_div_2[10]\n    getitem_2675 = _foreach_div_2[11]\n    getitem_2676 = _foreach_div_2[12]\n    getitem_2677 = _foreach_div_2[13]\n    getitem_2678 = _foreach_div_2[14]\n    getitem_2679 = _foreach_div_2[15]\n    getitem_2680 = _foreach_div_2[16]\n    getitem_2681 = _foreach_div_2[17]\n    getitem_2682 = _foreach_div_2[18]\n    getitem_2683 = _foreach_div_2[19]\n    getitem_2684 = _foreach_div_2[20]\n    getitem_2685 = _foreach_div_2[21]\n    getitem_2686 = _foreach_div_2[22]\n    getitem_2687 = _foreach_div_2[23]\n    getitem_2688 = _foreach_div_2[24]\n    getitem_2689 = _foreach_div_2[25]\n    getitem_2690 = _foreach_div_2[26]\n    getitem_2691 = _foreach_div_2[27]\n    getitem_2692 = _foreach_div_2[28]\n    getitem_2693 = _foreach_div_2[29]\n    getitem_2694 = _foreach_div_2[30]\n    getitem_2695 = _foreach_div_2[31]\n    getitem_2696 = _foreach_div_2[32]\n    getitem_2697 = _foreach_div_2[33]\n    getitem_2698 = _foreach_div_2[34]\n    getitem_2699 = _foreach_div_2[35]\n    getitem_2700 = _foreach_div_2[36]\n    getitem_2701 = _foreach_div_2[37]\n    getitem_2702 = _foreach_div_2[38]\n    getitem_2703 = _foreach_div_2[39]\n    getitem_2704 = _foreach_div_2[40]\n    getitem_2705 = _foreach_div_2[41]\n    getitem_2706 = _foreach_div_2[42]\n    getitem_2707 = _foreach_div_2[43]\n    getitem_2708 = _foreach_div_2[44]\n    getitem_2709 = _foreach_div_2[45]\n    getitem_2710 = _foreach_div_2[46]\n    getitem_2711 = _foreach_div_2[47]\n    getitem_2712 = _foreach_div_2[48]\n    getitem_2713 = _foreach_div_2[49]\n    getitem_2714 = _foreach_div_2[50]\n    getitem_2715 = _foreach_div_2[51]\n    getitem_2716 = _foreach_div_2[52]\n    getitem_2717 = _foreach_div_2[53]\n    getitem_2718 = _foreach_div_2[54]\n    getitem_2719 = _foreach_div_2[55]\n    getitem_2720 = _foreach_div_2[56]\n    getitem_2721 = _foreach_div_2[57]\n    getitem_2722 = _foreach_div_2[58]\n    getitem_2723 = _foreach_div_2[59]\n    getitem_2724 = _foreach_div_2[60]\n    getitem_2725 = _foreach_div_2[61]\n    getitem_2726 = _foreach_div_2[62]\n    getitem_2727 = _foreach_div_2[63]\n    getitem_2728 = _foreach_div_2[64]\n    getitem_2729 = _foreach_div_2[65]\n    getitem_2730 = _foreach_div_2[66]\n    getitem_2731 = _foreach_div_2[67]\n    getitem_2732 = _foreach_div_2[68]\n    getitem_2733 = _foreach_div_2[69]\n    getitem_2734 = _foreach_div_2[70]\n    getitem_2735 = _foreach_div_2[71]\n    getitem_2736 = _foreach_div_2[72]\n    getitem_2737 = _foreach_div_2[73]\n    getitem_2738 = _foreach_div_2[74]\n    getitem_2739 = _foreach_div_2[75]\n    getitem_2740 = _foreach_div_2[76]\n    getitem_2741 = _foreach_div_2[77]\n    getitem_2742 = _foreach_div_2[78]\n    getitem_2743 = _foreach_div_2[79]\n    getitem_2744 = _foreach_div_2[80]\n    getitem_2745 = _foreach_div_2[81]\n    getitem_2746 = _foreach_div_2[82]\n    getitem_2747 = _foreach_div_2[83]\n    getitem_2748 = _foreach_div_2[84]\n    getitem_2749 = _foreach_div_2[85]\n    getitem_2750 = _foreach_div_2[86]\n    getitem_2751 = _foreach_div_2[87]\n    getitem_2752 = _foreach_div_2[88]\n    getitem_2753 = _foreach_div_2[89]\n    getitem_2754 = _foreach_div_2[90]\n    getitem_2755 = _foreach_div_2[91]\n    getitem_2756 = _foreach_div_2[92]\n    getitem_2757 = _foreach_div_2[93]\n    getitem_2758 = _foreach_div_2[94]\n    getitem_2759 = _foreach_div_2[95]\n    getitem_2760 = _foreach_div_2[96]\n    getitem_2761 = _foreach_div_2[97]\n    getitem_2762 = _foreach_div_2[98]\n    getitem_2763 = _foreach_div_2[99]\n    getitem_2764 = _foreach_div_2[100]\n    getitem_2765 = _foreach_div_2[101]\n    getitem_2766 = _foreach_div_2[102]\n    getitem_2767 = _foreach_div_2[103]\n    getitem_2768 = _foreach_div_2[104]\n    getitem_2769 = _foreach_div_2[105]\n    getitem_2770 = _foreach_div_2[106]\n    getitem_2771 = _foreach_div_2[107]\n    getitem_2772 = _foreach_div_2[108]\n    getitem_2773 = _foreach_div_2[109]\n    getitem_2774 = _foreach_div_2[110]\n    getitem_2775 = _foreach_div_2[111]\n    getitem_2776 = _foreach_div_2[112]\n    getitem_2777 = _foreach_div_2[113]\n    getitem_2778 = _foreach_div_2[114]\n    getitem_2779 = _foreach_div_2[115]\n    getitem_2780 = _foreach_div_2[116]\n    getitem_2781 = _foreach_div_2[117]\n    getitem_2782 = _foreach_div_2[118]\n    getitem_2783 = _foreach_div_2[119]\n    getitem_2784 = _foreach_div_2[120]\n    getitem_2785 = _foreach_div_2[121]\n    getitem_2786 = _foreach_div_2[122]\n    getitem_2787 = _foreach_div_2[123]\n    getitem_2788 = _foreach_div_2[124]\n    getitem_2789 = _foreach_div_2[125]\n    getitem_2790 = _foreach_div_2[126]\n    getitem_2791 = _foreach_div_2[127]\n    getitem_2792 = _foreach_div_2[128]\n    getitem_2793 = _foreach_div_2[129]\n    getitem_2794 = _foreach_div_2[130]\n    getitem_2795 = _foreach_div_2[131]\n    getitem_2796 = _foreach_div_2[132]\n    getitem_2797 = _foreach_div_2[133]\n    getitem_2798 = _foreach_div_2[134]\n    getitem_2799 = _foreach_div_2[135]\n    getitem_2800 = _foreach_div_2[136]\n    getitem_2801 = _foreach_div_2[137]\n    getitem_2802 = _foreach_div_2[138]\n    getitem_2803 = _foreach_div_2[139]\n    getitem_2804 = _foreach_div_2[140]\n    getitem_2805 = _foreach_div_2[141]\n    getitem_2806 = _foreach_div_2[142]\n    getitem_2807 = _foreach_div_2[143]\n    getitem_2808 = _foreach_div_2[144]\n    getitem_2809 = _foreach_div_2[145]\n    getitem_2810 = _foreach_div_2[146]\n    getitem_2811 = _foreach_div_2[147];  _foreach_div_2 = None\n    _foreach_div_3 = torch.ops.aten._foreach_div.List([getitem_444, getitem_445, getitem_446, getitem_447, getitem_448, getitem_449, getitem_450, getitem_451, getitem_452, getitem_453, getitem_454, getitem_455, getitem_456, getitem_457, getitem_458, getitem_459, getitem_460, getitem_461, getitem_462, getitem_463, getitem_464, getitem_465, getitem_466, getitem_467, getitem_468, getitem_469, getitem_470, getitem_471, getitem_472, getitem_473, getitem_474, getitem_475, getitem_476, getitem_477, getitem_478, getitem_479, getitem_480, getitem_481, getitem_482, getitem_483, getitem_484, getitem_485, getitem_486, getitem_487, getitem_488, getitem_489, getitem_490, getitem_491, getitem_492, getitem_493, getitem_494, getitem_495, getitem_496, getitem_497, getitem_498, getitem_499, getitem_500, getitem_501, getitem_502, getitem_503, getitem_504, getitem_505, getitem_506, getitem_507, getitem_508, getitem_509, getitem_510, getitem_511, getitem_512, getitem_513, getitem_514, getitem_515, getitem_516, getitem_517, getitem_518, getitem_519, getitem_520, getitem_521, getitem_522, getitem_523, getitem_524, getitem_525, getitem_526, getitem_527, getitem_528, getitem_529, getitem_530, getitem_531, getitem_532, getitem_533, getitem_534, getitem_535, getitem_536, getitem_537, getitem_538, getitem_539, getitem_540, getitem_541, getitem_542, getitem_543, getitem_544, getitem_545, getitem_546, getitem_547, getitem_548, getitem_549, getitem_550, getitem_551, getitem_552, getitem_553, getitem_554, getitem_555, getitem_556, getitem_557, getitem_558, getitem_559, getitem_560, getitem_561, getitem_562, getitem_563, getitem_564, getitem_565, getitem_566, getitem_567, getitem_568, getitem_569, getitem_570, getitem_571, getitem_572, getitem_573, getitem_574, getitem_575, getitem_576, getitem_577, getitem_578, getitem_579, getitem_580, getitem_581, getitem_582, getitem_583, getitem_584, getitem_585, getitem_586, getitem_587, getitem_588, getitem_589, getitem_590, getitem_591], [getitem_2664, getitem_2665, getitem_2666, getitem_2667, getitem_2668, getitem_2669, getitem_2670, getitem_2671, getitem_2672, getitem_2673, getitem_2674, getitem_2675, getitem_2676, getitem_2677, getitem_2678, getitem_2679, getitem_2680, getitem_2681, getitem_2682, getitem_2683, getitem_2684, getitem_2685, getitem_2686, getitem_2687, getitem_2688, getitem_2689, getitem_2690, getitem_2691, getitem_2692, getitem_2693, getitem_2694, getitem_2695, getitem_2696, getitem_2697, getitem_2698, getitem_2699, getitem_2700, getitem_2701, getitem_2702, getitem_2703, getitem_2704, getitem_2705, getitem_2706, getitem_2707, getitem_2708, getitem_2709, getitem_2710, getitem_2711, getitem_2712, getitem_2713, getitem_2714, getitem_2715, getitem_2716, getitem_2717, getitem_2718, getitem_2719, getitem_2720, getitem_2721, getitem_2722, getitem_2723, getitem_2724, getitem_2725, getitem_2726, getitem_2727, getitem_2728, getitem_2729, getitem_2730, getitem_2731, getitem_2732, getitem_2733, getitem_2734, getitem_2735, getitem_2736, getitem_2737, getitem_2738, getitem_2739, getitem_2740, getitem_2741, getitem_2742, getitem_2743, getitem_2744, getitem_2745, getitem_2746, getitem_2747, getitem_2748, getitem_2749, getitem_2750, getitem_2751, getitem_2752, getitem_2753, getitem_2754, getitem_2755, getitem_2756, getitem_2757, getitem_2758, getitem_2759, getitem_2760, getitem_2761, getitem_2762, getitem_2763, getitem_2764, getitem_2765, getitem_2766, getitem_2767, getitem_2768, getitem_2769, getitem_2770, getitem_2771, getitem_2772, getitem_2773, getitem_2774, getitem_2775, getitem_2776, getitem_2777, getitem_2778, getitem_2779, getitem_2780, getitem_2781, getitem_2782, getitem_2783, getitem_2784, getitem_2785, getitem_2786, getitem_2787, getitem_2788, getitem_2789, getitem_2790, getitem_2791, getitem_2792, getitem_2793, getitem_2794, getitem_2795, getitem_2796, getitem_2797, getitem_2798, getitem_2799, getitem_2800, getitem_2801, getitem_2802, getitem_2803, getitem_2804, getitem_2805, getitem_2806, getitem_2807, getitem_2808, getitem_2809, getitem_2810, getitem_2811]);  getitem_2664 = getitem_2665 = getitem_2666 = getitem_2667 = getitem_2668 = getitem_2669 = getitem_2670 = getitem_2671 = getitem_2672 = getitem_2673 = getitem_2674 = getitem_2675 = getitem_2676 = getitem_2677 = getitem_2678 = getitem_2679 = getitem_2680 = getitem_2681 = getitem_2682 = getitem_2683 = getitem_2684 = getitem_2685 = getitem_2686 = getitem_2687 = getitem_2688 = getitem_2689 = getitem_2690 = getitem_2691 = getitem_2692 = getitem_2693 = getitem_2694 = getitem_2695 = getitem_2696 = getitem_2697 = getitem_2698 = getitem_2699 = getitem_2700 = getitem_2701 = getitem_2702 = getitem_2703 = getitem_2704 = getitem_2705 = getitem_2706 = getitem_2707 = getitem_2708 = getitem_2709 = getitem_2710 = getitem_2711 = getitem_2712 = getitem_2713 = getitem_2714 = getitem_2715 = getitem_2716 = getitem_2717 = getitem_2718 = getitem_2719 = getitem_2720 = getitem_2721 = getitem_2722 = getitem_2723 = getitem_2724 = getitem_2725 = getitem_2726 = getitem_2727 = getitem_2728 = getitem_2729 = getitem_2730 = getitem_2731 = getitem_2732 = getitem_2733 = getitem_2734 = getitem_2735 = getitem_2736 = getitem_2737 = getitem_2738 = getitem_2739 = getitem_2740 = getitem_2741 = getitem_2742 = getitem_2743 = getitem_2744 = getitem_2745 = getitem_2746 = getitem_2747 = getitem_2748 = getitem_2749 = getitem_2750 = getitem_2751 = getitem_2752 = getitem_2753 = getitem_2754 = getitem_2755 = getitem_2756 = getitem_2757 = getitem_2758 = getitem_2759 = getitem_2760 = getitem_2761 = getitem_2762 = getitem_2763 = getitem_2764 = getitem_2765 = getitem_2766 = getitem_2767 = getitem_2768 = getitem_2769 = getitem_2770 = getitem_2771 = getitem_2772 = getitem_2773 = getitem_2774 = getitem_2775 = getitem_2776 = getitem_2777 = getitem_2778 = getitem_2779 = getitem_2780 = getitem_2781 = getitem_2782 = getitem_2783 = getitem_2784 = getitem_2785 = getitem_2786 = getitem_2787 = getitem_2788 = getitem_2789 = getitem_2790 = getitem_2791 = getitem_2792 = getitem_2793 = getitem_2794 = getitem_2795 = getitem_2796 = getitem_2797 = getitem_2798 = getitem_2799 = getitem_2800 = getitem_2801 = getitem_2802 = getitem_2803 = getitem_2804 = getitem_2805 = getitem_2806 = getitem_2807 = getitem_2808 = getitem_2809 = getitem_2810 = getitem_2811 = None\n    getitem_2812 = _foreach_div_3[0]\n    getitem_2813 = _foreach_div_3[1]\n    getitem_2814 = _foreach_div_3[2]\n    getitem_2815 = _foreach_div_3[3]\n    getitem_2816 = _foreach_div_3[4]\n    getitem_2817 = _foreach_div_3[5]\n    getitem_2818 = _foreach_div_3[6]\n    getitem_2819 = _foreach_div_3[7]\n    getitem_2820 = _foreach_div_3[8]\n    getitem_2821 = _foreach_div_3[9]\n    getitem_2822 = _foreach_div_3[10]\n    getitem_2823 = _foreach_div_3[11]\n    getitem_2824 = _foreach_div_3[12]\n    getitem_2825 = _foreach_div_3[13]\n    getitem_2826 = _foreach_div_3[14]\n    getitem_2827 = _foreach_div_3[15]\n    getitem_2828 = _foreach_div_3[16]\n    getitem_2829 = _foreach_div_3[17]\n    getitem_2830 = _foreach_div_3[18]\n    getitem_2831 = _foreach_div_3[19]\n    getitem_2832 = _foreach_div_3[20]\n    getitem_2833 = _foreach_div_3[21]\n    getitem_2834 = _foreach_div_3[22]\n    getitem_2835 = _foreach_div_3[23]\n    getitem_2836 = _foreach_div_3[24]\n    getitem_2837 = _foreach_div_3[25]\n    getitem_2838 = _foreach_div_3[26]\n    getitem_2839 = _foreach_div_3[27]\n    getitem_2840 = _foreach_div_3[28]\n    getitem_2841 = _foreach_div_3[29]\n    getitem_2842 = _foreach_div_3[30]\n    getitem_2843 = _foreach_div_3[31]\n    getitem_2844 = _foreach_div_3[32]\n    getitem_2845 = _foreach_div_3[33]\n    getitem_2846 = _foreach_div_3[34]\n    getitem_2847 = _foreach_div_3[35]\n    getitem_2848 = _foreach_div_3[36]\n    getitem_2849 = _foreach_div_3[37]\n    getitem_2850 = _foreach_div_3[38]\n    getitem_2851 = _foreach_div_3[39]\n    getitem_2852 = _foreach_div_3[40]\n    getitem_2853 = _foreach_div_3[41]\n    getitem_2854 = _foreach_div_3[42]\n    getitem_2855 = _foreach_div_3[43]\n    getitem_2856 = _foreach_div_3[44]\n    getitem_2857 = _foreach_div_3[45]\n    getitem_2858 = _foreach_div_3[46]\n    getitem_2859 = _foreach_div_3[47]\n    getitem_2860 = _foreach_div_3[48]\n    getitem_2861 = _foreach_div_3[49]\n    getitem_2862 = _foreach_div_3[50]\n    getitem_2863 = _foreach_div_3[51]\n    getitem_2864 = _foreach_div_3[52]\n    getitem_2865 = _foreach_div_3[53]\n    getitem_2866 = _foreach_div_3[54]\n    getitem_2867 = _foreach_div_3[55]\n    getitem_2868 = _foreach_div_3[56]\n    getitem_2869 = _foreach_div_3[57]\n    getitem_2870 = _foreach_div_3[58]\n    getitem_2871 = _foreach_div_3[59]\n    getitem_2872 = _foreach_div_3[60]\n    getitem_2873 = _foreach_div_3[61]\n    getitem_2874 = _foreach_div_3[62]\n    getitem_2875 = _foreach_div_3[63]\n    getitem_2876 = _foreach_div_3[64]\n    getitem_2877 = _foreach_div_3[65]\n    getitem_2878 = _foreach_div_3[66]\n    getitem_2879 = _foreach_div_3[67]\n    getitem_2880 = _foreach_div_3[68]\n    getitem_2881 = _foreach_div_3[69]\n    getitem_2882 = _foreach_div_3[70]\n    getitem_2883 = _foreach_div_3[71]\n    getitem_2884 = _foreach_div_3[72]\n    getitem_2885 = _foreach_div_3[73]\n    getitem_2886 = _foreach_div_3[74]\n    getitem_2887 = _foreach_div_3[75]\n    getitem_2888 = _foreach_div_3[76]\n    getitem_2889 = _foreach_div_3[77]\n    getitem_2890 = _foreach_div_3[78]\n    getitem_2891 = _foreach_div_3[79]\n    getitem_2892 = _foreach_div_3[80]\n    getitem_2893 = _foreach_div_3[81]\n    getitem_2894 = _foreach_div_3[82]\n    getitem_2895 = _foreach_div_3[83]\n    getitem_2896 = _foreach_div_3[84]\n    getitem_2897 = _foreach_div_3[85]\n    getitem_2898 = _foreach_div_3[86]\n    getitem_2899 = _foreach_div_3[87]\n    getitem_2900 = _foreach_div_3[88]\n    getitem_2901 = _foreach_div_3[89]\n    getitem_2902 = _foreach_div_3[90]\n    getitem_2903 = _foreach_div_3[91]\n    getitem_2904 = _foreach_div_3[92]\n    getitem_2905 = _foreach_div_3[93]\n    getitem_2906 = _foreach_div_3[94]\n    getitem_2907 = _foreach_div_3[95]\n    getitem_2908 = _foreach_div_3[96]\n    getitem_2909 = _foreach_div_3[97]\n    getitem_2910 = _foreach_div_3[98]\n    getitem_2911 = _foreach_div_3[99]\n    getitem_2912 = _foreach_div_3[100]\n    getitem_2913 = _foreach_div_3[101]\n    getitem_2914 = _foreach_div_3[102]\n    getitem_2915 = _foreach_div_3[103]\n    getitem_2916 = _foreach_div_3[104]\n    getitem_2917 = _foreach_div_3[105]\n    getitem_2918 = _foreach_div_3[106]\n    getitem_2919 = _foreach_div_3[107]\n    getitem_2920 = _foreach_div_3[108]\n    getitem_2921 = _foreach_div_3[109]\n    getitem_2922 = _foreach_div_3[110]\n    getitem_2923 = _foreach_div_3[111]\n    getitem_2924 = _foreach_div_3[112]\n    getitem_2925 = _foreach_div_3[113]\n    getitem_2926 = _foreach_div_3[114]\n    getitem_2927 = _foreach_div_3[115]\n    getitem_2928 = _foreach_div_3[116]\n    getitem_2929 = _foreach_div_3[117]\n    getitem_2930 = _foreach_div_3[118]\n    getitem_2931 = _foreach_div_3[119]\n    getitem_2932 = _foreach_div_3[120]\n    getitem_2933 = _foreach_div_3[121]\n    getitem_2934 = _foreach_div_3[122]\n    getitem_2935 = _foreach_div_3[123]\n    getitem_2936 = _foreach_div_3[124]\n    getitem_2937 = _foreach_div_3[125]\n    getitem_2938 = _foreach_div_3[126]\n    getitem_2939 = _foreach_div_3[127]\n    getitem_2940 = _foreach_div_3[128]\n    getitem_2941 = _foreach_div_3[129]\n    getitem_2942 = _foreach_div_3[130]\n    getitem_2943 = _foreach_div_3[131]\n    getitem_2944 = _foreach_div_3[132]\n    getitem_2945 = _foreach_div_3[133]\n    getitem_2946 = _foreach_div_3[134]\n    getitem_2947 = _foreach_div_3[135]\n    getitem_2948 = _foreach_div_3[136]\n    getitem_2949 = _foreach_div_3[137]\n    getitem_2950 = _foreach_div_3[138]\n    getitem_2951 = _foreach_div_3[139]\n    getitem_2952 = _foreach_div_3[140]\n    getitem_2953 = _foreach_div_3[141]\n    getitem_2954 = _foreach_div_3[142]\n    getitem_2955 = _foreach_div_3[143]\n    getitem_2956 = _foreach_div_3[144]\n    getitem_2957 = _foreach_div_3[145]\n    getitem_2958 = _foreach_div_3[146]\n    getitem_2959 = _foreach_div_3[147];  _foreach_div_3 = None\n    _foreach_add_4 = torch.ops.aten._foreach_add.List([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, arg11_1, arg12_1, arg13_1, arg14_1, arg15_1, arg16_1, arg17_1, arg18_1, arg19_1, arg20_1, arg21_1, arg22_1, arg23_1, arg24_1, arg25_1, arg26_1, arg27_1, arg28_1, arg29_1, arg30_1, arg31_1, arg32_1, arg33_1, arg34_1, arg35_1, arg36_1, arg37_1, arg38_1, arg39_1, arg40_1, arg41_1, arg42_1, arg43_1, arg44_1, arg45_1, arg46_1, arg47_1, arg48_1, arg49_1, arg50_1, arg51_1, arg52_1, arg53_1, arg54_1, arg55_1, arg56_1, arg57_1, arg58_1, arg59_1, arg60_1, arg61_1, arg62_1, arg63_1, arg64_1, arg65_1, arg66_1, arg67_1, arg68_1, arg69_1, arg70_1, arg71_1, arg72_1, arg73_1, arg74_1, arg75_1, arg76_1, arg77_1, arg78_1, arg79_1, arg80_1, arg81_1, arg82_1, arg83_1, arg84_1, arg85_1, arg86_1, arg87_1, arg88_1, arg89_1, arg90_1, arg91_1, arg92_1, arg93_1, arg94_1, arg95_1, arg96_1, arg97_1, arg98_1, arg99_1, arg100_1, arg101_1, arg102_1, arg103_1, arg104_1, arg105_1, arg106_1, arg107_1, arg108_1, arg109_1, arg110_1, arg111_1, arg112_1, arg113_1, arg114_1, arg115_1, arg116_1, arg117_1, arg118_1, arg119_1, arg120_1, arg121_1, arg122_1, arg123_1, arg124_1, arg125_1, arg126_1, arg127_1, arg128_1, arg129_1, arg130_1, arg131_1, arg132_1, arg133_1, arg134_1, arg135_1, arg136_1, arg137_1, arg138_1, arg139_1, arg140_1, arg141_1, arg142_1, arg143_1, arg144_1, arg145_1, arg146_1, arg147_1], [getitem_2812, getitem_2813, getitem_2814, getitem_2815, getitem_2816, getitem_2817, getitem_2818, getitem_2819, getitem_2820, getitem_2821, getitem_2822, getitem_2823, getitem_2824, getitem_2825, getitem_2826, getitem_2827, getitem_2828, getitem_2829, getitem_2830, getitem_2831, getitem_2832, getitem_2833, getitem_2834, getitem_2835, getitem_2836, getitem_2837, getitem_2838, getitem_2839, getitem_2840, getitem_2841, getitem_2842, getitem_2843, getitem_2844, getitem_2845, getitem_2846, getitem_2847, getitem_2848, getitem_2849, getitem_2850, getitem_2851, getitem_2852, getitem_2853, getitem_2854, getitem_2855, getitem_2856, getitem_2857, getitem_2858, getitem_2859, getitem_2860, getitem_2861, getitem_2862, getitem_2863, getitem_2864, getitem_2865, getitem_2866, getitem_2867, getitem_2868, getitem_2869, getitem_2870, getitem_2871, getitem_2872, getitem_2873, getitem_2874, getitem_2875, getitem_2876, getitem_2877, getitem_2878, getitem_2879, getitem_2880, getitem_2881, getitem_2882, getitem_2883, getitem_2884, getitem_2885, getitem_2886, getitem_2887, getitem_2888, getitem_2889, getitem_2890, getitem_2891, getitem_2892, getitem_2893, getitem_2894, getitem_2895, getitem_2896, getitem_2897, getitem_2898, getitem_2899, getitem_2900, getitem_2901, getitem_2902, getitem_2903, getitem_2904, getitem_2905, getitem_2906, getitem_2907, getitem_2908, getitem_2909, getitem_2910, getitem_2911, getitem_2912, getitem_2913, getitem_2914, getitem_2915, getitem_2916, getitem_2917, getitem_2918, getitem_2919, getitem_2920, getitem_2921, getitem_2922, getitem_2923, getitem_2924, getitem_2925, getitem_2926, getitem_2927, getitem_2928, getitem_2929, getitem_2930, getitem_2931, getitem_2932, getitem_2933, getitem_2934, getitem_2935, getitem_2936, getitem_2937, getitem_2938, getitem_2939, getitem_2940, getitem_2941, getitem_2942, getitem_2943, getitem_2944, getitem_2945, getitem_2946, getitem_2947, getitem_2948, getitem_2949, getitem_2950, getitem_2951, getitem_2952, getitem_2953, getitem_2954, getitem_2955, getitem_2956, getitem_2957, getitem_2958, getitem_2959]);  getitem_2812 = getitem_2813 = getitem_2814 = getitem_2815 = getitem_2816 = getitem_2817 = getitem_2818 = getitem_2819 = getitem_2820 = getitem_2821 = getitem_2822 = getitem_2823 = getitem_2824 = getitem_2825 = getitem_2826 = getitem_2827 = getitem_2828 = getitem_2829 = getitem_2830 = getitem_2831 = getitem_2832 = getitem_2833 = getitem_2834 = getitem_2835 = getitem_2836 = getitem_2837 = getitem_2838 = getitem_2839 = getitem_2840 = getitem_2841 = getitem_2842 = getitem_2843 = getitem_2844 = getitem_2845 = getitem_2846 = getitem_2847 = getitem_2848 = getitem_2849 = getitem_2850 = getitem_2851 = getitem_2852 = getitem_2853 = getitem_2854 = getitem_2855 = getitem_2856 = getitem_2857 = getitem_2858 = getitem_2859 = getitem_2860 = getitem_2861 = getitem_2862 = getitem_2863 = getitem_2864 = getitem_2865 = getitem_2866 = getitem_2867 = getitem_2868 = getitem_2869 = getitem_2870 = getitem_2871 = getitem_2872 = getitem_2873 = getitem_2874 = getitem_2875 = getitem_2876 = getitem_2877 = getitem_2878 = getitem_2879 = getitem_2880 = getitem_2881 = getitem_2882 = getitem_2883 = getitem_2884 = getitem_2885 = getitem_2886 = getitem_2887 = getitem_2888 = getitem_2889 = getitem_2890 = getitem_2891 = getitem_2892 = getitem_2893 = getitem_2894 = getitem_2895 = getitem_2896 = getitem_2897 = getitem_2898 = getitem_2899 = getitem_2900 = getitem_2901 = getitem_2902 = getitem_2903 = getitem_2904 = getitem_2905 = getitem_2906 = getitem_2907 = getitem_2908 = getitem_2909 = getitem_2910 = getitem_2911 = getitem_2912 = getitem_2913 = getitem_2914 = getitem_2915 = getitem_2916 = getitem_2917 = getitem_2918 = getitem_2919 = getitem_2920 = getitem_2921 = getitem_2922 = getitem_2923 = getitem_2924 = getitem_2925 = getitem_2926 = getitem_2927 = getitem_2928 = getitem_2929 = getitem_2930 = getitem_2931 = getitem_2932 = getitem_2933 = getitem_2934 = getitem_2935 = getitem_2936 = getitem_2937 = getitem_2938 = getitem_2939 = getitem_2940 = getitem_2941 = getitem_2942 = getitem_2943 = getitem_2944 = getitem_2945 = getitem_2946 = getitem_2947 = getitem_2948 = getitem_2949 = getitem_2950 = getitem_2951 = getitem_2952 = getitem_2953 = getitem_2954 = getitem_2955 = getitem_2956 = getitem_2957 = getitem_2958 = getitem_2959 = None\n    getitem_2960 = _foreach_add_4[0]\n    getitem_2961 = _foreach_add_4[1]\n    getitem_2962 = _foreach_add_4[2]\n    getitem_2963 = _foreach_add_4[3]\n    getitem_2964 = _foreach_add_4[4]\n    getitem_2965 = _foreach_add_4[5]\n    getitem_2966 = _foreach_add_4[6]\n    getitem_2967 = _foreach_add_4[7]\n    getitem_2968 = _foreach_add_4[8]\n    getitem_2969 = _foreach_add_4[9]\n    getitem_2970 = _foreach_add_4[10]\n    getitem_2971 = _foreach_add_4[11]\n    getitem_2972 = _foreach_add_4[12]\n    getitem_2973 = _foreach_add_4[13]\n    getitem_2974 = _foreach_add_4[14]\n    getitem_2975 = _foreach_add_4[15]\n    getitem_2976 = _foreach_add_4[16]\n    getitem_2977 = _foreach_add_4[17]\n    getitem_2978 = _foreach_add_4[18]\n    getitem_2979 = _foreach_add_4[19]\n    getitem_2980 = _foreach_add_4[20]\n    getitem_2981 = _foreach_add_4[21]\n    getitem_2982 = _foreach_add_4[22]\n    getitem_2983 = _foreach_add_4[23]\n    getitem_2984 = _foreach_add_4[24]\n    getitem_2985 = _foreach_add_4[25]\n    getitem_2986 = _foreach_add_4[26]\n    getitem_2987 = _foreach_add_4[27]\n    getitem_2988 = _foreach_add_4[28]\n    getitem_2989 = _foreach_add_4[29]\n    getitem_2990 = _foreach_add_4[30]\n    getitem_2991 = _foreach_add_4[31]\n    getitem_2992 = _foreach_add_4[32]\n    getitem_2993 = _foreach_add_4[33]\n    getitem_2994 = _foreach_add_4[34]\n    getitem_2995 = _foreach_add_4[35]\n    getitem_2996 = _foreach_add_4[36]\n    getitem_2997 = _foreach_add_4[37]\n    getitem_2998 = _foreach_add_4[38]\n    getitem_2999 = _foreach_add_4[39]\n    getitem_3000 = _foreach_add_4[40]\n    getitem_3001 = _foreach_add_4[41]\n    getitem_3002 = _foreach_add_4[42]\n    getitem_3003 = _foreach_add_4[43]\n    getitem_3004 = _foreach_add_4[44]\n    getitem_3005 = _foreach_add_4[45]\n    getitem_3006 = _foreach_add_4[46]\n    getitem_3007 = _foreach_add_4[47]\n    getitem_3008 = _foreach_add_4[48]\n    getitem_3009 = _foreach_add_4[49]\n    getitem_3010 = _foreach_add_4[50]\n    getitem_3011 = _foreach_add_4[51]\n    getitem_3012 = _foreach_add_4[52]\n    getitem_3013 = _foreach_add_4[53]\n    getitem_3014 = _foreach_add_4[54]\n    getitem_3015 = _foreach_add_4[55]\n    getitem_3016 = _foreach_add_4[56]\n    getitem_3017 = _foreach_add_4[57]\n    getitem_3018 = _foreach_add_4[58]\n    getitem_3019 = _foreach_add_4[59]\n    getitem_3020 = _foreach_add_4[60]\n    getitem_3021 = _foreach_add_4[61]\n    getitem_3022 = _foreach_add_4[62]\n    getitem_3023 = _foreach_add_4[63]\n    getitem_3024 = _foreach_add_4[64]\n    getitem_3025 = _foreach_add_4[65]\n    getitem_3026 = _foreach_add_4[66]\n    getitem_3027 = _foreach_add_4[67]\n    getitem_3028 = _foreach_add_4[68]\n    getitem_3029 = _foreach_add_4[69]\n    getitem_3030 = _foreach_add_4[70]\n    getitem_3031 = _foreach_add_4[71]\n    getitem_3032 = _foreach_add_4[72]\n    getitem_3033 = _foreach_add_4[73]\n    getitem_3034 = _foreach_add_4[74]\n    getitem_3035 = _foreach_add_4[75]\n    getitem_3036 = _foreach_add_4[76]\n    getitem_3037 = _foreach_add_4[77]\n    getitem_3038 = _foreach_add_4[78]\n    getitem_3039 = _foreach_add_4[79]\n    getitem_3040 = _foreach_add_4[80]\n    getitem_3041 = _foreach_add_4[81]\n    getitem_3042 = _foreach_add_4[82]\n    getitem_3043 = _foreach_add_4[83]\n    getitem_3044 = _foreach_add_4[84]\n    getitem_3045 = _foreach_add_4[85]\n    getitem_3046 = _foreach_add_4[86]\n    getitem_3047 = _foreach_add_4[87]\n    getitem_3048 = _foreach_add_4[88]\n    getitem_3049 = _foreach_add_4[89]\n    getitem_3050 = _foreach_add_4[90]\n    getitem_3051 = _foreach_add_4[91]\n    getitem_3052 = _foreach_add_4[92]\n    getitem_3053 = _foreach_add_4[93]\n    getitem_3054 = _foreach_add_4[94]\n    getitem_3055 = _foreach_add_4[95]\n    getitem_3056 = _foreach_add_4[96]\n    getitem_3057 = _foreach_add_4[97]\n    getitem_3058 = _foreach_add_4[98]\n    getitem_3059 = _foreach_add_4[99]\n    getitem_3060 = _foreach_add_4[100]\n    getitem_3061 = _foreach_add_4[101]\n    getitem_3062 = _foreach_add_4[102]\n    getitem_3063 = _foreach_add_4[103]\n    getitem_3064 = _foreach_add_4[104]\n    getitem_3065 = _foreach_add_4[105]\n    getitem_3066 = _foreach_add_4[106]\n    getitem_3067 = _foreach_add_4[107]\n    getitem_3068 = _foreach_add_4[108]\n    getitem_3069 = _foreach_add_4[109]\n    getitem_3070 = _foreach_add_4[110]\n    getitem_3071 = _foreach_add_4[111]\n    getitem_3072 = _foreach_add_4[112]\n    getitem_3073 = _foreach_add_4[113]\n    getitem_3074 = _foreach_add_4[114]\n    getitem_3075 = _foreach_add_4[115]\n    getitem_3076 = _foreach_add_4[116]\n    getitem_3077 = _foreach_add_4[117]\n    getitem_3078 = _foreach_add_4[118]\n    getitem_3079 = _foreach_add_4[119]\n    getitem_3080 = _foreach_add_4[120]\n    getitem_3081 = _foreach_add_4[121]\n    getitem_3082 = _foreach_add_4[122]\n    getitem_3083 = _foreach_add_4[123]\n    getitem_3084 = _foreach_add_4[124]\n    getitem_3085 = _foreach_add_4[125]\n    getitem_3086 = _foreach_add_4[126]\n    getitem_3087 = _foreach_add_4[127]\n    getitem_3088 = _foreach_add_4[128]\n    getitem_3089 = _foreach_add_4[129]\n    getitem_3090 = _foreach_add_4[130]\n    getitem_3091 = _foreach_add_4[131]\n    getitem_3092 = _foreach_add_4[132]\n    getitem_3093 = _foreach_add_4[133]\n    getitem_3094 = _foreach_add_4[134]\n    getitem_3095 = _foreach_add_4[135]\n    getitem_3096 = _foreach_add_4[136]\n    getitem_3097 = _foreach_add_4[137]\n    getitem_3098 = _foreach_add_4[138]\n    getitem_3099 = _foreach_add_4[139]\n    getitem_3100 = _foreach_add_4[140]\n    getitem_3101 = _foreach_add_4[141]\n    getitem_3102 = _foreach_add_4[142]\n    getitem_3103 = _foreach_add_4[143]\n    getitem_3104 = _foreach_add_4[144]\n    getitem_3105 = _foreach_add_4[145]\n    getitem_3106 = _foreach_add_4[146]\n    getitem_3107 = _foreach_add_4[147];  _foreach_add_4 = None\n    copy_ = torch.ops.aten.copy_.default(arg0_1, getitem_2960);  arg0_1 = getitem_2960 = copy_ = None\n    copy__1 = torch.ops.aten.copy_.default(arg1_1, getitem_2961);  arg1_1 = getitem_2961 = copy__1 = None\n    copy__2 = torch.ops.aten.copy_.default(arg2_1, getitem_2962);  arg2_1 = getitem_2962 = copy__2 = None\n    copy__3 = torch.ops.aten.copy_.default(arg3_1, getitem_2963);  arg3_1 = getitem_2963 = copy__3 = None\n    copy__4 = torch.ops.aten.copy_.default(arg4_1, getitem_2964);  arg4_1 = getitem_2964 = copy__4 = None\n    copy__5 = torch.ops.aten.copy_.default(arg5_1, getitem_2965);  arg5_1 = getitem_2965 = copy__5 = None\n    copy__6 = torch.ops.aten.copy_.default(arg6_1, getitem_2966);  arg6_1 = getitem_2966 = copy__6 = None\n    copy__7 = torch.ops.aten.copy_.default(arg7_1, getitem_2967);  arg7_1 = getitem_2967 = copy__7 = None\n    copy__8 = torch.ops.aten.copy_.default(arg8_1, getitem_2968);  arg8_1 = getitem_2968 = copy__8 = None\n    copy__9 = torch.ops.aten.copy_.default(arg9_1, getitem_2969);  arg9_1 = getitem_2969 = copy__9 = None\n    copy__10 = torch.ops.aten.copy_.default(arg10_1, getitem_2970);  arg10_1 = getitem_2970 = copy__10 = None\n    copy__11 = torch.ops.aten.copy_.default(arg11_1, getitem_2971);  arg11_1 = getitem_2971 = copy__11 = None\n    copy__12 = torch.ops.aten.copy_.default(arg12_1, getitem_2972);  arg12_1 = getitem_2972 = copy__12 = None\n    copy__13 = torch.ops.aten.copy_.default(arg13_1, getitem_2973);  arg13_1 = getitem_2973 = copy__13 = None\n    copy__14 = torch.ops.aten.copy_.default(arg14_1, getitem_2974);  arg14_1 = getitem_2974 = copy__14 = None\n    copy__15 = torch.ops.aten.copy_.default(arg15_1, getitem_2975);  arg15_1 = getitem_2975 = copy__15 = None\n    copy__16 = torch.ops.aten.copy_.default(arg16_1, getitem_2976);  arg16_1 = getitem_2976 = copy__16 = None\n    copy__17 = torch.ops.aten.copy_.default(arg17_1, getitem_2977);  arg17_1 = getitem_2977 = copy__17 = None\n    copy__18 = torch.ops.aten.copy_.default(arg18_1, getitem_2978);  arg18_1 = getitem_2978 = copy__18 = None\n    copy__19 = torch.ops.aten.copy_.default(arg19_1, getitem_2979);  arg19_1 = getitem_2979 = copy__19 = None\n    copy__20 = torch.ops.aten.copy_.default(arg20_1, getitem_2980);  arg20_1 = getitem_2980 = copy__20 = None\n    copy__21 = torch.ops.aten.copy_.default(arg21_1, getitem_2981);  arg21_1 = getitem_2981 = copy__21 = None\n    copy__22 = torch.ops.aten.copy_.default(arg22_1, getitem_2982);  arg22_1 = getitem_2982 = copy__22 = None\n    copy__23 = torch.ops.aten.copy_.default(arg23_1, getitem_2983);  arg23_1 = getitem_2983 = copy__23 = None\n    copy__24 = torch.ops.aten.copy_.default(arg24_1, getitem_2984);  arg24_1 = getitem_2984 = copy__24 = None\n    copy__25 = torch.ops.aten.copy_.default(arg25_1, getitem_2985);  arg25_1 = getitem_2985 = copy__25 = None\n    copy__26 = torch.ops.aten.copy_.default(arg26_1, getitem_2986);  arg26_1 = getitem_2986 = copy__26 = None\n    copy__27 = torch.ops.aten.copy_.default(arg27_1, getitem_2987);  arg27_1 = getitem_2987 = copy__27 = None\n    copy__28 = torch.ops.aten.copy_.default(arg28_1, getitem_2988);  arg28_1 = getitem_2988 = copy__28 = None\n    copy__29 = torch.ops.aten.copy_.default(arg29_1, getitem_2989);  arg29_1 = getitem_2989 = copy__29 = None\n    copy__30 = torch.ops.aten.copy_.default(arg30_1, getitem_2990);  arg30_1 = getitem_2990 = copy__30 = None\n    copy__31 = torch.ops.aten.copy_.default(arg31_1, getitem_2991);  arg31_1 = getitem_2991 = copy__31 = None\n    copy__32 = torch.ops.aten.copy_.default(arg32_1, getitem_2992);  arg32_1 = getitem_2992 = copy__32 = None\n    copy__33 = torch.ops.aten.copy_.default(arg33_1, getitem_2993);  arg33_1 = getitem_2993 = copy__33 = None\n    copy__34 = torch.ops.aten.copy_.default(arg34_1, getitem_2994);  arg34_1 = getitem_2994 = copy__34 = None\n    copy__35 = torch.ops.aten.copy_.default(arg35_1, getitem_2995);  arg35_1 = getitem_2995 = copy__35 = None\n    copy__36 = torch.ops.aten.copy_.default(arg36_1, getitem_2996);  arg36_1 = getitem_2996 = copy__36 = None\n    copy__37 = torch.ops.aten.copy_.default(arg37_1, getitem_2997);  arg37_1 = getitem_2997 = copy__37 = None\n    copy__38 = torch.ops.aten.copy_.default(arg38_1, getitem_2998);  arg38_1 = getitem_2998 = copy__38 = None\n    copy__39 = torch.ops.aten.copy_.default(arg39_1, getitem_2999);  arg39_1 = getitem_2999 = copy__39 = None\n    copy__40 = torch.ops.aten.copy_.default(arg40_1, getitem_3000);  arg40_1 = getitem_3000 = copy__40 = None\n    copy__41 = torch.ops.aten.copy_.default(arg41_1, getitem_3001);  arg41_1 = getitem_3001 = copy__41 = None\n    copy__42 = torch.ops.aten.copy_.default(arg42_1, getitem_3002);  arg42_1 = getitem_3002 = copy__42 = None\n    copy__43 = torch.ops.aten.copy_.default(arg43_1, getitem_3003);  arg43_1 = getitem_3003 = copy__43 = None\n    copy__44 = torch.ops.aten.copy_.default(arg44_1, getitem_3004);  arg44_1 = getitem_3004 = copy__44 = None\n    copy__45 = torch.ops.aten.copy_.default(arg45_1, getitem_3005);  arg45_1 = getitem_3005 = copy__45 = None\n    copy__46 = torch.ops.aten.copy_.default(arg46_1, getitem_3006);  arg46_1 = getitem_3006 = copy__46 = None\n    copy__47 = torch.ops.aten.copy_.default(arg47_1, getitem_3007);  arg47_1 = getitem_3007 = copy__47 = None\n    copy__48 = torch.ops.aten.copy_.default(arg48_1, getitem_3008);  arg48_1 = getitem_3008 = copy__48 = None\n    copy__49 = torch.ops.aten.copy_.default(arg49_1, getitem_3009);  arg49_1 = getitem_3009 = copy__49 = None\n    copy__50 = torch.ops.aten.copy_.default(arg50_1, getitem_3010);  arg50_1 = getitem_3010 = copy__50 = None\n    copy__51 = torch.ops.aten.copy_.default(arg51_1, getitem_3011);  arg51_1 = getitem_3011 = copy__51 = None\n    copy__52 = torch.ops.aten.copy_.default(arg52_1, getitem_3012);  arg52_1 = getitem_3012 = copy__52 = None\n    copy__53 = torch.ops.aten.copy_.default(arg53_1, getitem_3013);  arg53_1 = getitem_3013 = copy__53 = None\n    copy__54 = torch.ops.aten.copy_.default(arg54_1, getitem_3014);  arg54_1 = getitem_3014 = copy__54 = None\n    copy__55 = torch.ops.aten.copy_.default(arg55_1, getitem_3015);  arg55_1 = getitem_3015 = copy__55 = None\n    copy__56 = torch.ops.aten.copy_.default(arg56_1, getitem_3016);  arg56_1 = getitem_3016 = copy__56 = None\n    copy__57 = torch.ops.aten.copy_.default(arg57_1, getitem_3017);  arg57_1 = getitem_3017 = copy__57 = None\n    copy__58 = torch.ops.aten.copy_.default(arg58_1, getitem_3018);  arg58_1 = getitem_3018 = copy__58 = None\n    copy__59 = torch.ops.aten.copy_.default(arg59_1, getitem_3019);  arg59_1 = getitem_3019 = copy__59 = None\n    copy__60 = torch.ops.aten.copy_.default(arg60_1, getitem_3020);  arg60_1 = getitem_3020 = copy__60 = None\n    copy__61 = torch.ops.aten.copy_.default(arg61_1, getitem_3021);  arg61_1 = getitem_3021 = copy__61 = None\n    copy__62 = torch.ops.aten.copy_.default(arg62_1, getitem_3022);  arg62_1 = getitem_3022 = copy__62 = None\n    copy__63 = torch.ops.aten.copy_.default(arg63_1, getitem_3023);  arg63_1 = getitem_3023 = copy__63 = None\n    copy__64 = torch.ops.aten.copy_.default(arg64_1, getitem_3024);  arg64_1 = getitem_3024 = copy__64 = None\n    copy__65 = torch.ops.aten.copy_.default(arg65_1, getitem_3025);  arg65_1 = getitem_3025 = copy__65 = None\n    copy__66 = torch.ops.aten.copy_.default(arg66_1, getitem_3026);  arg66_1 = getitem_3026 = copy__66 = None\n    copy__67 = torch.ops.aten.copy_.default(arg67_1, getitem_3027);  arg67_1 = getitem_3027 = copy__67 = None\n    copy__68 = torch.ops.aten.copy_.default(arg68_1, getitem_3028);  arg68_1 = getitem_3028 = copy__68 = None\n    copy__69 = torch.ops.aten.copy_.default(arg69_1, getitem_3029);  arg69_1 = getitem_3029 = copy__69 = None\n    copy__70 = torch.ops.aten.copy_.default(arg70_1, getitem_3030);  arg70_1 = getitem_3030 = copy__70 = None\n    copy__71 = torch.ops.aten.copy_.default(arg71_1, getitem_3031);  arg71_1 = getitem_3031 = copy__71 = None\n    copy__72 = torch.ops.aten.copy_.default(arg72_1, getitem_3032);  arg72_1 = getitem_3032 = copy__72 = None\n    copy__73 = torch.ops.aten.copy_.default(arg73_1, getitem_3033);  arg73_1 = getitem_3033 = copy__73 = None\n    copy__74 = torch.ops.aten.copy_.default(arg74_1, getitem_3034);  arg74_1 = getitem_3034 = copy__74 = None\n    copy__75 = torch.ops.aten.copy_.default(arg75_1, getitem_3035);  arg75_1 = getitem_3035 = copy__75 = None\n    copy__76 = torch.ops.aten.copy_.default(arg76_1, getitem_3036);  arg76_1 = getitem_3036 = copy__76 = None\n    copy__77 = torch.ops.aten.copy_.default(arg77_1, getitem_3037);  arg77_1 = getitem_3037 = copy__77 = None\n    copy__78 = torch.ops.aten.copy_.default(arg78_1, getitem_3038);  arg78_1 = getitem_3038 = copy__78 = None\n    copy__79 = torch.ops.aten.copy_.default(arg79_1, getitem_3039);  arg79_1 = getitem_3039 = copy__79 = None\n    copy__80 = torch.ops.aten.copy_.default(arg80_1, getitem_3040);  arg80_1 = getitem_3040 = copy__80 = None\n    copy__81 = torch.ops.aten.copy_.default(arg81_1, getitem_3041);  arg81_1 = getitem_3041 = copy__81 = None\n    copy__82 = torch.ops.aten.copy_.default(arg82_1, getitem_3042);  arg82_1 = getitem_3042 = copy__82 = None\n    copy__83 = torch.ops.aten.copy_.default(arg83_1, getitem_3043);  arg83_1 = getitem_3043 = copy__83 = None\n    copy__84 = torch.ops.aten.copy_.default(arg84_1, getitem_3044);  arg84_1 = getitem_3044 = copy__84 = None\n    copy__85 = torch.ops.aten.copy_.default(arg85_1, getitem_3045);  arg85_1 = getitem_3045 = copy__85 = None\n    copy__86 = torch.ops.aten.copy_.default(arg86_1, getitem_3046);  arg86_1 = getitem_3046 = copy__86 = None\n    copy__87 = torch.ops.aten.copy_.default(arg87_1, getitem_3047);  arg87_1 = getitem_3047 = copy__87 = None\n    copy__88 = torch.ops.aten.copy_.default(arg88_1, getitem_3048);  arg88_1 = getitem_3048 = copy__88 = None\n    copy__89 = torch.ops.aten.copy_.default(arg89_1, getitem_3049);  arg89_1 = getitem_3049 = copy__89 = None\n    copy__90 = torch.ops.aten.copy_.default(arg90_1, getitem_3050);  arg90_1 = getitem_3050 = copy__90 = None\n    copy__91 = torch.ops.aten.copy_.default(arg91_1, getitem_3051);  arg91_1 = getitem_3051 = copy__91 = None\n    copy__92 = torch.ops.aten.copy_.default(arg92_1, getitem_3052);  arg92_1 = getitem_3052 = copy__92 = None\n    copy__93 = torch.ops.aten.copy_.default(arg93_1, getitem_3053);  arg93_1 = getitem_3053 = copy__93 = None\n    copy__94 = torch.ops.aten.copy_.default(arg94_1, getitem_3054);  arg94_1 = getitem_3054 = copy__94 = None\n    copy__95 = torch.ops.aten.copy_.default(arg95_1, getitem_3055);  arg95_1 = getitem_3055 = copy__95 = None\n    copy__96 = torch.ops.aten.copy_.default(arg96_1, getitem_3056);  arg96_1 = getitem_3056 = copy__96 = None\n    copy__97 = torch.ops.aten.copy_.default(arg97_1, getitem_3057);  arg97_1 = getitem_3057 = copy__97 = None\n    copy__98 = torch.ops.aten.copy_.default(arg98_1, getitem_3058);  arg98_1 = getitem_3058 = copy__98 = None\n    copy__99 = torch.ops.aten.copy_.default(arg99_1, getitem_3059);  arg99_1 = getitem_3059 = copy__99 = None\n    copy__100 = torch.ops.aten.copy_.default(arg100_1, getitem_3060);  arg100_1 = getitem_3060 = copy__100 = None\n    copy__101 = torch.ops.aten.copy_.default(arg101_1, getitem_3061);  arg101_1 = getitem_3061 = copy__101 = None\n    copy__102 = torch.ops.aten.copy_.default(arg102_1, getitem_3062);  arg102_1 = getitem_3062 = copy__102 = None\n    copy__103 = torch.ops.aten.copy_.default(arg103_1, getitem_3063);  arg103_1 = getitem_3063 = copy__103 = None\n    copy__104 = torch.ops.aten.copy_.default(arg104_1, getitem_3064);  arg104_1 = getitem_3064 = copy__104 = None\n    copy__105 = torch.ops.aten.copy_.default(arg105_1, getitem_3065);  arg105_1 = getitem_3065 = copy__105 = None\n    copy__106 = torch.ops.aten.copy_.default(arg106_1, getitem_3066);  arg106_1 = getitem_3066 = copy__106 = None\n    copy__107 = torch.ops.aten.copy_.default(arg107_1, getitem_3067);  arg107_1 = getitem_3067 = copy__107 = None\n    copy__108 = torch.ops.aten.copy_.default(arg108_1, getitem_3068);  arg108_1 = getitem_3068 = copy__108 = None\n    copy__109 = torch.ops.aten.copy_.default(arg109_1, getitem_3069);  arg109_1 = getitem_3069 = copy__109 = None\n    copy__110 = torch.ops.aten.copy_.default(arg110_1, getitem_3070);  arg110_1 = getitem_3070 = copy__110 = None\n    copy__111 = torch.ops.aten.copy_.default(arg111_1, getitem_3071);  arg111_1 = getitem_3071 = copy__111 = None\n    copy__112 = torch.ops.aten.copy_.default(arg112_1, getitem_3072);  arg112_1 = getitem_3072 = copy__112 = None\n    copy__113 = torch.ops.aten.copy_.default(arg113_1, getitem_3073);  arg113_1 = getitem_3073 = copy__113 = None\n    copy__114 = torch.ops.aten.copy_.default(arg114_1, getitem_3074);  arg114_1 = getitem_3074 = copy__114 = None\n    copy__115 = torch.ops.aten.copy_.default(arg115_1, getitem_3075);  arg115_1 = getitem_3075 = copy__115 = None\n    copy__116 = torch.ops.aten.copy_.default(arg116_1, getitem_3076);  arg116_1 = getitem_3076 = copy__116 = None\n    copy__117 = torch.ops.aten.copy_.default(arg117_1, getitem_3077);  arg117_1 = getitem_3077 = copy__117 = None\n    copy__118 = torch.ops.aten.copy_.default(arg118_1, getitem_3078);  arg118_1 = getitem_3078 = copy__118 = None\n    copy__119 = torch.ops.aten.copy_.default(arg119_1, getitem_3079);  arg119_1 = getitem_3079 = copy__119 = None\n    copy__120 = torch.ops.aten.copy_.default(arg120_1, getitem_3080);  arg120_1 = getitem_3080 = copy__120 = None\n    copy__121 = torch.ops.aten.copy_.default(arg121_1, getitem_3081);  arg121_1 = getitem_3081 = copy__121 = None\n    copy__122 = torch.ops.aten.copy_.default(arg122_1, getitem_3082);  arg122_1 = getitem_3082 = copy__122 = None\n    copy__123 = torch.ops.aten.copy_.default(arg123_1, getitem_3083);  arg123_1 = getitem_3083 = copy__123 = None\n    copy__124 = torch.ops.aten.copy_.default(arg124_1, getitem_3084);  arg124_1 = getitem_3084 = copy__124 = None\n    copy__125 = torch.ops.aten.copy_.default(arg125_1, getitem_3085);  arg125_1 = getitem_3085 = copy__125 = None\n    copy__126 = torch.ops.aten.copy_.default(arg126_1, getitem_3086);  arg126_1 = getitem_3086 = copy__126 = None\n    copy__127 = torch.ops.aten.copy_.default(arg127_1, getitem_3087);  arg127_1 = getitem_3087 = copy__127 = None\n    copy__128 = torch.ops.aten.copy_.default(arg128_1, getitem_3088);  arg128_1 = getitem_3088 = copy__128 = None\n    copy__129 = torch.ops.aten.copy_.default(arg129_1, getitem_3089);  arg129_1 = getitem_3089 = copy__129 = None\n    copy__130 = torch.ops.aten.copy_.default(arg130_1, getitem_3090);  arg130_1 = getitem_3090 = copy__130 = None\n    copy__131 = torch.ops.aten.copy_.default(arg131_1, getitem_3091);  arg131_1 = getitem_3091 = copy__131 = None\n    copy__132 = torch.ops.aten.copy_.default(arg132_1, getitem_3092);  arg132_1 = getitem_3092 = copy__132 = None\n    copy__133 = torch.ops.aten.copy_.default(arg133_1, getitem_3093);  arg133_1 = getitem_3093 = copy__133 = None\n    copy__134 = torch.ops.aten.copy_.default(arg134_1, getitem_3094);  arg134_1 = getitem_3094 = copy__134 = None\n    copy__135 = torch.ops.aten.copy_.default(arg135_1, getitem_3095);  arg135_1 = getitem_3095 = copy__135 = None\n    copy__136 = torch.ops.aten.copy_.default(arg136_1, getitem_3096);  arg136_1 = getitem_3096 = copy__136 = None\n    copy__137 = torch.ops.aten.copy_.default(arg137_1, getitem_3097);  arg137_1 = getitem_3097 = copy__137 = None\n    copy__138 = torch.ops.aten.copy_.default(arg138_1, getitem_3098);  arg138_1 = getitem_3098 = copy__138 = None\n    copy__139 = torch.ops.aten.copy_.default(arg139_1, getitem_3099);  arg139_1 = getitem_3099 = copy__139 = None\n    copy__140 = torch.ops.aten.copy_.default(arg140_1, getitem_3100);  arg140_1 = getitem_3100 = copy__140 = None\n    copy__141 = torch.ops.aten.copy_.default(arg141_1, getitem_3101);  arg141_1 = getitem_3101 = copy__141 = None\n    copy__142 = torch.ops.aten.copy_.default(arg142_1, getitem_3102);  arg142_1 = getitem_3102 = copy__142 = None\n    copy__143 = torch.ops.aten.copy_.default(arg143_1, getitem_3103);  arg143_1 = getitem_3103 = copy__143 = None\n    copy__144 = torch.ops.aten.copy_.default(arg144_1, getitem_3104);  arg144_1 = getitem_3104 = copy__144 = None\n    copy__145 = torch.ops.aten.copy_.default(arg145_1, getitem_3105);  arg145_1 = getitem_3105 = copy__145 = None\n    copy__146 = torch.ops.aten.copy_.default(arg146_1, getitem_3106);  arg146_1 = getitem_3106 = copy__146 = None\n    copy__147 = torch.ops.aten.copy_.default(arg147_1, getitem_3107);  arg147_1 = getitem_3107 = copy__147 = None\n    copy__148 = torch.ops.aten.copy_.default(arg148_1, getitem_1);  arg148_1 = getitem_1 = copy__148 = None\n    copy__149 = torch.ops.aten.copy_.default(arg149_1, getitem_445);  arg149_1 = getitem_445 = copy__149 = None\n    copy__150 = torch.ops.aten.copy_.default(arg150_1, getitem_889);  arg150_1 = getitem_889 = copy__150 = None\n    copy__151 = torch.ops.aten.copy_.default(arg299_1, getitem_444);  arg299_1 = getitem_444 = copy__151 = None\n    copy__152 = torch.ops.aten.copy_.default(arg300_1, getitem_446);  arg300_1 = getitem_446 = copy__152 = None\n    copy__153 = torch.ops.aten.copy_.default(arg301_1, getitem_447);  arg301_1 = getitem_447 = copy__153 = None\n    copy__154 = torch.ops.aten.copy_.default(arg302_1, getitem_448);  arg302_1 = getitem_448 = copy__154 = None\n    copy__155 = torch.ops.aten.copy_.default(arg303_1, getitem_449);  arg303_1 = getitem_449 = copy__155 = None\n    copy__156 = torch.ops.aten.copy_.default(arg304_1, getitem_450);  arg304_1 = getitem_450 = copy__156 = None\n    copy__157 = torch.ops.aten.copy_.default(arg305_1, getitem_451);  arg305_1 = getitem_451 = copy__157 = None\n    copy__158 = torch.ops.aten.copy_.default(arg306_1, getitem_452);  arg306_1 = getitem_452 = copy__158 = None\n    copy__159 = torch.ops.aten.copy_.default(arg307_1, getitem_453);  arg307_1 = getitem_453 = copy__159 = None\n    copy__160 = torch.ops.aten.copy_.default(arg308_1, getitem_454);  arg308_1 = getitem_454 = copy__160 = None\n    copy__161 = torch.ops.aten.copy_.default(arg309_1, getitem_455);  arg309_1 = getitem_455 = copy__161 = None\n    copy__162 = torch.ops.aten.copy_.default(arg310_1, getitem_456);  arg310_1 = getitem_456 = copy__162 = None\n    copy__163 = torch.ops.aten.copy_.default(arg311_1, getitem_457);  arg311_1 = getitem_457 = copy__163 = None\n    copy__164 = torch.ops.aten.copy_.default(arg312_1, getitem_458);  arg312_1 = getitem_458 = copy__164 = None\n    copy__165 = torch.ops.aten.copy_.default(arg313_1, getitem_459);  arg313_1 = getitem_459 = copy__165 = None\n    copy__166 = torch.ops.aten.copy_.default(arg314_1, getitem_460);  arg314_1 = getitem_460 = copy__166 = None\n    copy__167 = torch.ops.aten.copy_.default(arg315_1, getitem_461);  arg315_1 = getitem_461 = copy__167 = None\n    copy__168 = torch.ops.aten.copy_.default(arg316_1, getitem_462);  arg316_1 = getitem_462 = copy__168 = None\n    copy__169 = torch.ops.aten.copy_.default(arg317_1, getitem_463);  arg317_1 = getitem_463 = copy__169 = None\n    copy__170 = torch.ops.aten.copy_.default(arg318_1, getitem_464);  arg318_1 = getitem_464 = copy__170 = None\n    copy__171 = torch.ops.aten.copy_.default(arg319_1, getitem_465);  arg319_1 = getitem_465 = copy__171 = None\n    copy__172 = torch.ops.aten.copy_.default(arg320_1, getitem_466);  arg320_1 = getitem_466 = copy__172 = None\n    copy__173 = torch.ops.aten.copy_.default(arg321_1, getitem_467);  arg321_1 = getitem_467 = copy__173 = None\n    copy__174 = torch.ops.aten.copy_.default(arg322_1, getitem_468);  arg322_1 = getitem_468 = copy__174 = None\n    copy__175 = torch.ops.aten.copy_.default(arg323_1, getitem_469);  arg323_1 = getitem_469 = copy__175 = None\n    copy__176 = torch.ops.aten.copy_.default(arg324_1, getitem_470);  arg324_1 = getitem_470 = copy__176 = None\n    copy__177 = torch.ops.aten.copy_.default(arg325_1, getitem_471);  arg325_1 = getitem_471 = copy__177 = None\n    copy__178 = torch.ops.aten.copy_.default(arg326_1, getitem_472);  arg326_1 = getitem_472 = copy__178 = None\n    copy__179 = torch.ops.aten.copy_.default(arg327_1, getitem_473);  arg327_1 = getitem_473 = copy__179 = None\n    copy__180 = torch.ops.aten.copy_.default(arg328_1, getitem_474);  arg328_1 = getitem_474 = copy__180 = None\n    copy__181 = torch.ops.aten.copy_.default(arg329_1, getitem_475);  arg329_1 = getitem_475 = copy__181 = None\n    copy__182 = torch.ops.aten.copy_.default(arg330_1, getitem_476);  arg330_1 = getitem_476 = copy__182 = None\n    copy__183 = torch.ops.aten.copy_.default(arg331_1, getitem_477);  arg331_1 = getitem_477 = copy__183 = None\n    copy__184 = torch.ops.aten.copy_.default(arg332_1, getitem_478);  arg332_1 = getitem_478 = copy__184 = None\n    copy__185 = torch.ops.aten.copy_.default(arg333_1, getitem_479);  arg333_1 = getitem_479 = copy__185 = None\n    copy__186 = torch.ops.aten.copy_.default(arg334_1, getitem_480);  arg334_1 = getitem_480 = copy__186 = None\n    copy__187 = torch.ops.aten.copy_.default(arg335_1, getitem_481);  arg335_1 = getitem_481 = copy__187 = None\n    copy__188 = torch.ops.aten.copy_.default(arg336_1, getitem_482);  arg336_1 = getitem_482 = copy__188 = None\n    copy__189 = torch.ops.aten.copy_.default(arg337_1, getitem_483);  arg337_1 = getitem_483 = copy__189 = None\n    copy__190 = torch.ops.aten.copy_.default(arg338_1, getitem_484);  arg338_1 = getitem_484 = copy__190 = None\n    copy__191 = torch.ops.aten.copy_.default(arg339_1, getitem_485);  arg339_1 = getitem_485 = copy__191 = None\n    copy__192 = torch.ops.aten.copy_.default(arg340_1, getitem_486);  arg340_1 = getitem_486 = copy__192 = None\n    copy__193 = torch.ops.aten.copy_.default(arg341_1, getitem_487);  arg341_1 = getitem_487 = copy__193 = None\n    copy__194 = torch.ops.aten.copy_.default(arg342_1, getitem_488);  arg342_1 = getitem_488 = copy__194 = None\n    copy__195 = torch.ops.aten.copy_.default(arg343_1, getitem_489);  arg343_1 = getitem_489 = copy__195 = None\n    copy__196 = torch.ops.aten.copy_.default(arg344_1, getitem_490);  arg344_1 = getitem_490 = copy__196 = None\n    copy__197 = torch.ops.aten.copy_.default(arg345_1, getitem_491);  arg345_1 = getitem_491 = copy__197 = None\n    copy__198 = torch.ops.aten.copy_.default(arg346_1, getitem_492);  arg346_1 = getitem_492 = copy__198 = None\n    copy__199 = torch.ops.aten.copy_.default(arg347_1, getitem_493);  arg347_1 = getitem_493 = copy__199 = None\n    copy__200 = torch.ops.aten.copy_.default(arg348_1, getitem_494);  arg348_1 = getitem_494 = copy__200 = None\n    copy__201 = torch.ops.aten.copy_.default(arg349_1, getitem_495);  arg349_1 = getitem_495 = copy__201 = None\n    copy__202 = torch.ops.aten.copy_.default(arg350_1, getitem_496);  arg350_1 = getitem_496 = copy__202 = None\n    copy__203 = torch.ops.aten.copy_.default(arg351_1, getitem_497);  arg351_1 = getitem_497 = copy__203 = None\n    copy__204 = torch.ops.aten.copy_.default(arg352_1, getitem_498);  arg352_1 = getitem_498 = copy__204 = None\n    copy__205 = torch.ops.aten.copy_.default(arg353_1, getitem_499);  arg353_1 = getitem_499 = copy__205 = None\n    copy__206 = torch.ops.aten.copy_.default(arg354_1, getitem_500);  arg354_1 = getitem_500 = copy__206 = None\n    copy__207 = torch.ops.aten.copy_.default(arg355_1, getitem_501);  arg355_1 = getitem_501 = copy__207 = None\n    copy__208 = torch.ops.aten.copy_.default(arg356_1, getitem_502);  arg356_1 = getitem_502 = copy__208 = None\n    copy__209 = torch.ops.aten.copy_.default(arg357_1, getitem_503);  arg357_1 = getitem_503 = copy__209 = None\n    copy__210 = torch.ops.aten.copy_.default(arg358_1, getitem_504);  arg358_1 = getitem_504 = copy__210 = None\n    copy__211 = torch.ops.aten.copy_.default(arg359_1, getitem_505);  arg359_1 = getitem_505 = copy__211 = None\n    copy__212 = torch.ops.aten.copy_.default(arg360_1, getitem_506);  arg360_1 = getitem_506 = copy__212 = None\n    copy__213 = torch.ops.aten.copy_.default(arg361_1, getitem_507);  arg361_1 = getitem_507 = copy__213 = None\n    copy__214 = torch.ops.aten.copy_.default(arg362_1, getitem_508);  arg362_1 = getitem_508 = copy__214 = None\n    copy__215 = torch.ops.aten.copy_.default(arg363_1, getitem_509);  arg363_1 = getitem_509 = copy__215 = None\n    copy__216 = torch.ops.aten.copy_.default(arg364_1, getitem_510);  arg364_1 = getitem_510 = copy__216 = None\n    copy__217 = torch.ops.aten.copy_.default(arg365_1, getitem_511);  arg365_1 = getitem_511 = copy__217 = None\n    copy__218 = torch.ops.aten.copy_.default(arg366_1, getitem_512);  arg366_1 = getitem_512 = copy__218 = None\n    copy__219 = torch.ops.aten.copy_.default(arg367_1, getitem_513);  arg367_1 = getitem_513 = copy__219 = None\n    copy__220 = torch.ops.aten.copy_.default(arg368_1, getitem_514);  arg368_1 = getitem_514 = copy__220 = None\n    copy__221 = torch.ops.aten.copy_.default(arg369_1, getitem_515);  arg369_1 = getitem_515 = copy__221 = None\n    copy__222 = torch.ops.aten.copy_.default(arg370_1, getitem_516);  arg370_1 = getitem_516 = copy__222 = None\n    copy__223 = torch.ops.aten.copy_.default(arg371_1, getitem_517);  arg371_1 = getitem_517 = copy__223 = None\n    copy__224 = torch.ops.aten.copy_.default(arg372_1, getitem_518);  arg372_1 = getitem_518 = copy__224 = None\n    copy__225 = torch.ops.aten.copy_.default(arg373_1, getitem_519);  arg373_1 = getitem_519 = copy__225 = None\n    copy__226 = torch.ops.aten.copy_.default(arg374_1, getitem_520);  arg374_1 = getitem_520 = copy__226 = None\n    copy__227 = torch.ops.aten.copy_.default(arg375_1, getitem_521);  arg375_1 = getitem_521 = copy__227 = None\n    copy__228 = torch.ops.aten.copy_.default(arg376_1, getitem_522);  arg376_1 = getitem_522 = copy__228 = None\n    copy__229 = torch.ops.aten.copy_.default(arg377_1, getitem_523);  arg377_1 = getitem_523 = copy__229 = None\n    copy__230 = torch.ops.aten.copy_.default(arg378_1, getitem_524);  arg378_1 = getitem_524 = copy__230 = None\n    copy__231 = torch.ops.aten.copy_.default(arg379_1, getitem_525);  arg379_1 = getitem_525 = copy__231 = None\n    copy__232 = torch.ops.aten.copy_.default(arg380_1, getitem_526);  arg380_1 = getitem_526 = copy__232 = None\n    copy__233 = torch.ops.aten.copy_.default(arg381_1, getitem_527);  arg381_1 = getitem_527 = copy__233 = None\n    copy__234 = torch.ops.aten.copy_.default(arg382_1, getitem_528);  arg382_1 = getitem_528 = copy__234 = None\n    copy__235 = torch.ops.aten.copy_.default(arg383_1, getitem_529);  arg383_1 = getitem_529 = copy__235 = None\n    copy__236 = torch.ops.aten.copy_.default(arg384_1, getitem_530);  arg384_1 = getitem_530 = copy__236 = None\n    copy__237 = torch.ops.aten.copy_.default(arg385_1, getitem_531);  arg385_1 = getitem_531 = copy__237 = None\n    copy__238 = torch.ops.aten.copy_.default(arg386_1, getitem_532);  arg386_1 = getitem_532 = copy__238 = None\n    copy__239 = torch.ops.aten.copy_.default(arg387_1, getitem_533);  arg387_1 = getitem_533 = copy__239 = None\n    copy__240 = torch.ops.aten.copy_.default(arg388_1, getitem_534);  arg388_1 = getitem_534 = copy__240 = None\n    copy__241 = torch.ops.aten.copy_.default(arg389_1, getitem_535);  arg389_1 = getitem_535 = copy__241 = None\n    copy__242 = torch.ops.aten.copy_.default(arg390_1, getitem_536);  arg390_1 = getitem_536 = copy__242 = None\n    copy__243 = torch.ops.aten.copy_.default(arg391_1, getitem_537);  arg391_1 = getitem_537 = copy__243 = None\n    copy__244 = torch.ops.aten.copy_.default(arg392_1, getitem_538);  arg392_1 = getitem_538 = copy__244 = None\n    copy__245 = torch.ops.aten.copy_.default(arg393_1, getitem_539);  arg393_1 = getitem_539 = copy__245 = None\n    copy__246 = torch.ops.aten.copy_.default(arg394_1, getitem_540);  arg394_1 = getitem_540 = copy__246 = None\n    copy__247 = torch.ops.aten.copy_.default(arg395_1, getitem_541);  arg395_1 = getitem_541 = copy__247 = None\n    copy__248 = torch.ops.aten.copy_.default(arg396_1, getitem_542);  arg396_1 = getitem_542 = copy__248 = None\n    copy__249 = torch.ops.aten.copy_.default(arg397_1, getitem_543);  arg397_1 = getitem_543 = copy__249 = None\n    copy__250 = torch.ops.aten.copy_.default(arg398_1, getitem_544);  arg398_1 = getitem_544 = copy__250 = None\n    copy__251 = torch.ops.aten.copy_.default(arg399_1, getitem_545);  arg399_1 = getitem_545 = copy__251 = None\n    copy__252 = torch.ops.aten.copy_.default(arg400_1, getitem_546);  arg400_1 = getitem_546 = copy__252 = None\n    copy__253 = torch.ops.aten.copy_.default(arg401_1, getitem_547);  arg401_1 = getitem_547 = copy__253 = None\n    copy__254 = torch.ops.aten.copy_.default(arg402_1, getitem_548);  arg402_1 = getitem_548 = copy__254 = None\n    copy__255 = torch.ops.aten.copy_.default(arg403_1, getitem_549);  arg403_1 = getitem_549 = copy__255 = None\n    copy__256 = torch.ops.aten.copy_.default(arg404_1, getitem_550);  arg404_1 = getitem_550 = copy__256 = None\n    copy__257 = torch.ops.aten.copy_.default(arg405_1, getitem_551);  arg405_1 = getitem_551 = copy__257 = None\n    copy__258 = torch.ops.aten.copy_.default(arg406_1, getitem_552);  arg406_1 = getitem_552 = copy__258 = None\n    copy__259 = torch.ops.aten.copy_.default(arg407_1, getitem_553);  arg407_1 = getitem_553 = copy__259 = None\n    copy__260 = torch.ops.aten.copy_.default(arg408_1, getitem_554);  arg408_1 = getitem_554 = copy__260 = None\n    copy__261 = torch.ops.aten.copy_.default(arg409_1, getitem_555);  arg409_1 = getitem_555 = copy__261 = None\n    copy__262 = torch.ops.aten.copy_.default(arg410_1, getitem_556);  arg410_1 = getitem_556 = copy__262 = None\n    copy__263 = torch.ops.aten.copy_.default(arg411_1, getitem_557);  arg411_1 = getitem_557 = copy__263 = None\n    copy__264 = torch.ops.aten.copy_.default(arg412_1, getitem_558);  arg412_1 = getitem_558 = copy__264 = None\n    copy__265 = torch.ops.aten.copy_.default(arg413_1, getitem_559);  arg413_1 = getitem_559 = copy__265 = None\n    copy__266 = torch.ops.aten.copy_.default(arg414_1, getitem_560);  arg414_1 = getitem_560 = copy__266 = None\n    copy__267 = torch.ops.aten.copy_.default(arg415_1, getitem_561);  arg415_1 = getitem_561 = copy__267 = None\n    copy__268 = torch.ops.aten.copy_.default(arg416_1, getitem_562);  arg416_1 = getitem_562 = copy__268 = None\n    copy__269 = torch.ops.aten.copy_.default(arg417_1, getitem_563);  arg417_1 = getitem_563 = copy__269 = None\n    copy__270 = torch.ops.aten.copy_.default(arg418_1, getitem_564);  arg418_1 = getitem_564 = copy__270 = None\n    copy__271 = torch.ops.aten.copy_.default(arg419_1, getitem_565);  arg419_1 = getitem_565 = copy__271 = None\n    copy__272 = torch.ops.aten.copy_.default(arg420_1, getitem_566);  arg420_1 = getitem_566 = copy__272 = None\n    copy__273 = torch.ops.aten.copy_.default(arg421_1, getitem_567);  arg421_1 = getitem_567 = copy__273 = None\n    copy__274 = torch.ops.aten.copy_.default(arg422_1, getitem_568);  arg422_1 = getitem_568 = copy__274 = None\n    copy__275 = torch.ops.aten.copy_.default(arg423_1, getitem_569);  arg423_1 = getitem_569 = copy__275 = None\n    copy__276 = torch.ops.aten.copy_.default(arg424_1, getitem_570);  arg424_1 = getitem_570 = copy__276 = None\n    copy__277 = torch.ops.aten.copy_.default(arg425_1, getitem_571);  arg425_1 = getitem_571 = copy__277 = None\n    copy__278 = torch.ops.aten.copy_.default(arg426_1, getitem_572);  arg426_1 = getitem_572 = copy__278 = None\n    copy__279 = torch.ops.aten.copy_.default(arg427_1, getitem_573);  arg427_1 = getitem_573 = copy__279 = None\n    copy__280 = torch.ops.aten.copy_.default(arg428_1, getitem_574);  arg428_1 = getitem_574 = copy__280 = None\n    copy__281 = torch.ops.aten.copy_.default(arg429_1, getitem_575);  arg429_1 = getitem_575 = copy__281 = None\n    copy__282 = torch.ops.aten.copy_.default(arg430_1, getitem_576);  arg430_1 = getitem_576 = copy__282 = None\n    copy__283 = torch.ops.aten.copy_.default(arg431_1, getitem_577);  arg431_1 = getitem_577 = copy__283 = None\n    copy__284 = torch.ops.aten.copy_.default(arg432_1, getitem_578);  arg432_1 = getitem_578 = copy__284 = None\n    copy__285 = torch.ops.aten.copy_.default(arg433_1, getitem_579);  arg433_1 = getitem_579 = copy__285 = None\n    copy__286 = torch.ops.aten.copy_.default(arg434_1, getitem_580);  arg434_1 = getitem_580 = copy__286 = None\n    copy__287 = torch.ops.aten.copy_.default(arg435_1, getitem_581);  arg435_1 = getitem_581 = copy__287 = None\n    copy__288 = torch.ops.aten.copy_.default(arg436_1, getitem_582);  arg436_1 = getitem_582 = copy__288 = None\n    copy__289 = torch.ops.aten.copy_.default(arg437_1, getitem_583);  arg437_1 = getitem_583 = copy__289 = None\n    copy__290 = torch.ops.aten.copy_.default(arg438_1, getitem_584);  arg438_1 = getitem_584 = copy__290 = None\n    copy__291 = torch.ops.aten.copy_.default(arg439_1, getitem_585);  arg439_1 = getitem_585 = copy__291 = None\n    copy__292 = torch.ops.aten.copy_.default(arg440_1, getitem_586);  arg440_1 = getitem_586 = copy__292 = None\n    copy__293 = torch.ops.aten.copy_.default(arg441_1, getitem_587);  arg441_1 = getitem_587 = copy__293 = None\n    copy__294 = torch.ops.aten.copy_.default(arg442_1, getitem_588);  arg442_1 = getitem_588 = copy__294 = None\n    copy__295 = torch.ops.aten.copy_.default(arg443_1, getitem_589);  arg443_1 = getitem_589 = copy__295 = None\n    copy__296 = torch.ops.aten.copy_.default(arg444_1, getitem_590);  arg444_1 = getitem_590 = copy__296 = None\n    copy__297 = torch.ops.aten.copy_.default(arg445_1, getitem_591);  arg445_1 = getitem_591 = copy__297 = None\n    copy__298 = torch.ops.aten.copy_.default(arg446_1, getitem_888);  arg446_1 = getitem_888 = copy__298 = None\n    copy__299 = torch.ops.aten.copy_.default(arg447_1, getitem_890);  arg447_1 = getitem_890 = copy__299 = None\n    copy__300 = torch.ops.aten.copy_.default(arg448_1, getitem_891);  arg448_1 = getitem_891 = copy__300 = None\n    copy__301 = torch.ops.aten.copy_.default(arg449_1, getitem_892);  arg449_1 = getitem_892 = copy__301 = None\n    copy__302 = torch.ops.aten.copy_.default(arg450_1, getitem_893);  arg450_1 = getitem_893 = copy__302 = None\n    copy__303 = torch.ops.aten.copy_.default(arg451_1, getitem_894);  arg451_1 = getitem_894 = copy__303 = None\n    copy__304 = torch.ops.aten.copy_.default(arg452_1, getitem_895);  arg452_1 = getitem_895 = copy__304 = None\n    copy__305 = torch.ops.aten.copy_.default(arg453_1, getitem_896);  arg453_1 = getitem_896 = copy__305 = None\n    copy__306 = torch.ops.aten.copy_.default(arg454_1, getitem_897);  arg454_1 = getitem_897 = copy__306 = None\n    copy__307 = torch.ops.aten.copy_.default(arg455_1, getitem_898);  arg455_1 = getitem_898 = copy__307 = None\n    copy__308 = torch.ops.aten.copy_.default(arg456_1, getitem_899);  arg456_1 = getitem_899 = copy__308 = None\n    copy__309 = torch.ops.aten.copy_.default(arg457_1, getitem_900);  arg457_1 = getitem_900 = copy__309 = None\n    copy__310 = torch.ops.aten.copy_.default(arg458_1, getitem_901);  arg458_1 = getitem_901 = copy__310 = None\n    copy__311 = torch.ops.aten.copy_.default(arg459_1, getitem_902);  arg459_1 = getitem_902 = copy__311 = None\n    copy__312 = torch.ops.aten.copy_.default(arg460_1, getitem_903);  arg460_1 = getitem_903 = copy__312 = None\n    copy__313 = torch.ops.aten.copy_.default(arg461_1, getitem_904);  arg461_1 = getitem_904 = copy__313 = None\n    copy__314 = torch.ops.aten.copy_.default(arg462_1, getitem_905);  arg462_1 = getitem_905 = copy__314 = None\n    copy__315 = torch.ops.aten.copy_.default(arg463_1, getitem_906);  arg463_1 = getitem_906 = copy__315 = None\n    copy__316 = torch.ops.aten.copy_.default(arg464_1, getitem_907);  arg464_1 = getitem_907 = copy__316 = None\n    copy__317 = torch.ops.aten.copy_.default(arg465_1, getitem_908);  arg465_1 = getitem_908 = copy__317 = None\n    copy__318 = torch.ops.aten.copy_.default(arg466_1, getitem_909);  arg466_1 = getitem_909 = copy__318 = None\n    copy__319 = torch.ops.aten.copy_.default(arg467_1, getitem_910);  arg467_1 = getitem_910 = copy__319 = None\n    copy__320 = torch.ops.aten.copy_.default(arg468_1, getitem_911);  arg468_1 = getitem_911 = copy__320 = None\n    copy__321 = torch.ops.aten.copy_.default(arg469_1, getitem_912);  arg469_1 = getitem_912 = copy__321 = None\n    copy__322 = torch.ops.aten.copy_.default(arg470_1, getitem_913);  arg470_1 = getitem_913 = copy__322 = None\n    copy__323 = torch.ops.aten.copy_.default(arg471_1, getitem_914);  arg471_1 = getitem_914 = copy__323 = None\n    copy__324 = torch.ops.aten.copy_.default(arg472_1, getitem_915);  arg472_1 = getitem_915 = copy__324 = None\n    copy__325 = torch.ops.aten.copy_.default(arg473_1, getitem_916);  arg473_1 = getitem_916 = copy__325 = None\n    copy__326 = torch.ops.aten.copy_.default(arg474_1, getitem_917);  arg474_1 = getitem_917 = copy__326 = None\n    copy__327 = torch.ops.aten.copy_.default(arg475_1, getitem_918);  arg475_1 = getitem_918 = copy__327 = None\n    copy__328 = torch.ops.aten.copy_.default(arg476_1, getitem_919);  arg476_1 = getitem_919 = copy__328 = None\n    copy__329 = torch.ops.aten.copy_.default(arg477_1, getitem_920);  arg477_1 = getitem_920 = copy__329 = None\n    copy__330 = torch.ops.aten.copy_.default(arg478_1, getitem_921);  arg478_1 = getitem_921 = copy__330 = None\n    copy__331 = torch.ops.aten.copy_.default(arg479_1, getitem_922);  arg479_1 = getitem_922 = copy__331 = None\n    copy__332 = torch.ops.aten.copy_.default(arg480_1, getitem_923);  arg480_1 = getitem_923 = copy__332 = None\n    copy__333 = torch.ops.aten.copy_.default(arg481_1, getitem_924);  arg481_1 = getitem_924 = copy__333 = None\n    copy__334 = torch.ops.aten.copy_.default(arg482_1, getitem_925);  arg482_1 = getitem_925 = copy__334 = None\n    copy__335 = torch.ops.aten.copy_.default(arg483_1, getitem_926);  arg483_1 = getitem_926 = copy__335 = None\n    copy__336 = torch.ops.aten.copy_.default(arg484_1, getitem_927);  arg484_1 = getitem_927 = copy__336 = None\n    copy__337 = torch.ops.aten.copy_.default(arg485_1, getitem_928);  arg485_1 = getitem_928 = copy__337 = None\n    copy__338 = torch.ops.aten.copy_.default(arg486_1, getitem_929);  arg486_1 = getitem_929 = copy__338 = None\n    copy__339 = torch.ops.aten.copy_.default(arg487_1, getitem_930);  arg487_1 = getitem_930 = copy__339 = None\n    copy__340 = torch.ops.aten.copy_.default(arg488_1, getitem_931);  arg488_1 = getitem_931 = copy__340 = None\n    copy__341 = torch.ops.aten.copy_.default(arg489_1, getitem_932);  arg489_1 = getitem_932 = copy__341 = None\n    copy__342 = torch.ops.aten.copy_.default(arg490_1, getitem_933);  arg490_1 = getitem_933 = copy__342 = None\n    copy__343 = torch.ops.aten.copy_.default(arg491_1, getitem_934);  arg491_1 = getitem_934 = copy__343 = None\n    copy__344 = torch.ops.aten.copy_.default(arg492_1, getitem_935);  arg492_1 = getitem_935 = copy__344 = None\n    copy__345 = torch.ops.aten.copy_.default(arg493_1, getitem_936);  arg493_1 = getitem_936 = copy__345 = None\n    copy__346 = torch.ops.aten.copy_.default(arg494_1, getitem_937);  arg494_1 = getitem_937 = copy__346 = None\n    copy__347 = torch.ops.aten.copy_.default(arg495_1, getitem_938);  arg495_1 = getitem_938 = copy__347 = None\n    copy__348 = torch.ops.aten.copy_.default(arg496_1, getitem_939);  arg496_1 = getitem_939 = copy__348 = None\n    copy__349 = torch.ops.aten.copy_.default(arg497_1, getitem_940);  arg497_1 = getitem_940 = copy__349 = None\n    copy__350 = torch.ops.aten.copy_.default(arg498_1, getitem_941);  arg498_1 = getitem_941 = copy__350 = None\n    copy__351 = torch.ops.aten.copy_.default(arg499_1, getitem_942);  arg499_1 = getitem_942 = copy__351 = None\n    copy__352 = torch.ops.aten.copy_.default(arg500_1, getitem_943);  arg500_1 = getitem_943 = copy__352 = None\n    copy__353 = torch.ops.aten.copy_.default(arg501_1, getitem_944);  arg501_1 = getitem_944 = copy__353 = None\n    copy__354 = torch.ops.aten.copy_.default(arg502_1, getitem_945);  arg502_1 = getitem_945 = copy__354 = None\n    copy__355 = torch.ops.aten.copy_.default(arg503_1, getitem_946);  arg503_1 = getitem_946 = copy__355 = None\n    copy__356 = torch.ops.aten.copy_.default(arg504_1, getitem_947);  arg504_1 = getitem_947 = copy__356 = None\n    copy__357 = torch.ops.aten.copy_.default(arg505_1, getitem_948);  arg505_1 = getitem_948 = copy__357 = None\n    copy__358 = torch.ops.aten.copy_.default(arg506_1, getitem_949);  arg506_1 = getitem_949 = copy__358 = None\n    copy__359 = torch.ops.aten.copy_.default(arg507_1, getitem_950);  arg507_1 = getitem_950 = copy__359 = None\n    copy__360 = torch.ops.aten.copy_.default(arg508_1, getitem_951);  arg508_1 = getitem_951 = copy__360 = None\n    copy__361 = torch.ops.aten.copy_.default(arg509_1, getitem_952);  arg509_1 = getitem_952 = copy__361 = None\n    copy__362 = torch.ops.aten.copy_.default(arg510_1, getitem_953);  arg510_1 = getitem_953 = copy__362 = None\n    copy__363 = torch.ops.aten.copy_.default(arg511_1, getitem_954);  arg511_1 = getitem_954 = copy__363 = None\n    copy__364 = torch.ops.aten.copy_.default(arg512_1, getitem_955);  arg512_1 = getitem_955 = copy__364 = None\n    copy__365 = torch.ops.aten.copy_.default(arg513_1, getitem_956);  arg513_1 = getitem_956 = copy__365 = None\n    copy__366 = torch.ops.aten.copy_.default(arg514_1, getitem_957);  arg514_1 = getitem_957 = copy__366 = None\n    copy__367 = torch.ops.aten.copy_.default(arg515_1, getitem_958);  arg515_1 = getitem_958 = copy__367 = None\n    copy__368 = torch.ops.aten.copy_.default(arg516_1, getitem_959);  arg516_1 = getitem_959 = copy__368 = None\n    copy__369 = torch.ops.aten.copy_.default(arg517_1, getitem_960);  arg517_1 = getitem_960 = copy__369 = None\n    copy__370 = torch.ops.aten.copy_.default(arg518_1, getitem_961);  arg518_1 = getitem_961 = copy__370 = None\n    copy__371 = torch.ops.aten.copy_.default(arg519_1, getitem_962);  arg519_1 = getitem_962 = copy__371 = None\n    copy__372 = torch.ops.aten.copy_.default(arg520_1, getitem_963);  arg520_1 = getitem_963 = copy__372 = None\n    copy__373 = torch.ops.aten.copy_.default(arg521_1, getitem_964);  arg521_1 = getitem_964 = copy__373 = None\n    copy__374 = torch.ops.aten.copy_.default(arg522_1, getitem_965);  arg522_1 = getitem_965 = copy__374 = None\n    copy__375 = torch.ops.aten.copy_.default(arg523_1, getitem_966);  arg523_1 = getitem_966 = copy__375 = None\n    copy__376 = torch.ops.aten.copy_.default(arg524_1, getitem_967);  arg524_1 = getitem_967 = copy__376 = None\n    copy__377 = torch.ops.aten.copy_.default(arg525_1, getitem_968);  arg525_1 = getitem_968 = copy__377 = None\n    copy__378 = torch.ops.aten.copy_.default(arg526_1, getitem_969);  arg526_1 = getitem_969 = copy__378 = None\n    copy__379 = torch.ops.aten.copy_.default(arg527_1, getitem_970);  arg527_1 = getitem_970 = copy__379 = None\n    copy__380 = torch.ops.aten.copy_.default(arg528_1, getitem_971);  arg528_1 = getitem_971 = copy__380 = None\n    copy__381 = torch.ops.aten.copy_.default(arg529_1, getitem_972);  arg529_1 = getitem_972 = copy__381 = None\n    copy__382 = torch.ops.aten.copy_.default(arg530_1, getitem_973);  arg530_1 = getitem_973 = copy__382 = None\n    copy__383 = torch.ops.aten.copy_.default(arg531_1, getitem_974);  arg531_1 = getitem_974 = copy__383 = None\n    copy__384 = torch.ops.aten.copy_.default(arg532_1, getitem_975);  arg532_1 = getitem_975 = copy__384 = None\n    copy__385 = torch.ops.aten.copy_.default(arg533_1, getitem_976);  arg533_1 = getitem_976 = copy__385 = None\n    copy__386 = torch.ops.aten.copy_.default(arg534_1, getitem_977);  arg534_1 = getitem_977 = copy__386 = None\n    copy__387 = torch.ops.aten.copy_.default(arg535_1, getitem_978);  arg535_1 = getitem_978 = copy__387 = None\n    copy__388 = torch.ops.aten.copy_.default(arg536_1, getitem_979);  arg536_1 = getitem_979 = copy__388 = None\n    copy__389 = torch.ops.aten.copy_.default(arg537_1, getitem_980);  arg537_1 = getitem_980 = copy__389 = None\n    copy__390 = torch.ops.aten.copy_.default(arg538_1, getitem_981);  arg538_1 = getitem_981 = copy__390 = None\n    copy__391 = torch.ops.aten.copy_.default(arg539_1, getitem_982);  arg539_1 = getitem_982 = copy__391 = None\n    copy__392 = torch.ops.aten.copy_.default(arg540_1, getitem_983);  arg540_1 = getitem_983 = copy__392 = None\n    copy__393 = torch.ops.aten.copy_.default(arg541_1, getitem_984);  arg541_1 = getitem_984 = copy__393 = None\n    copy__394 = torch.ops.aten.copy_.default(arg542_1, getitem_985);  arg542_1 = getitem_985 = copy__394 = None\n    copy__395 = torch.ops.aten.copy_.default(arg543_1, getitem_986);  arg543_1 = getitem_986 = copy__395 = None\n    copy__396 = torch.ops.aten.copy_.default(arg544_1, getitem_987);  arg544_1 = getitem_987 = copy__396 = None\n    copy__397 = torch.ops.aten.copy_.default(arg545_1, getitem_988);  arg545_1 = getitem_988 = copy__397 = None\n    copy__398 = torch.ops.aten.copy_.default(arg546_1, getitem_989);  arg546_1 = getitem_989 = copy__398 = None\n    copy__399 = torch.ops.aten.copy_.default(arg547_1, getitem_990);  arg547_1 = getitem_990 = copy__399 = None\n    copy__400 = torch.ops.aten.copy_.default(arg548_1, getitem_991);  arg548_1 = getitem_991 = copy__400 = None\n    copy__401 = torch.ops.aten.copy_.default(arg549_1, getitem_992);  arg549_1 = getitem_992 = copy__401 = None\n    copy__402 = torch.ops.aten.copy_.default(arg550_1, getitem_993);  arg550_1 = getitem_993 = copy__402 = None\n    copy__403 = torch.ops.aten.copy_.default(arg551_1, getitem_994);  arg551_1 = getitem_994 = copy__403 = None\n    copy__404 = torch.ops.aten.copy_.default(arg552_1, getitem_995);  arg552_1 = getitem_995 = copy__404 = None\n    copy__405 = torch.ops.aten.copy_.default(arg553_1, getitem_996);  arg553_1 = getitem_996 = copy__405 = None\n    copy__406 = torch.ops.aten.copy_.default(arg554_1, getitem_997);  arg554_1 = getitem_997 = copy__406 = None\n    copy__407 = torch.ops.aten.copy_.default(arg555_1, getitem_998);  arg555_1 = getitem_998 = copy__407 = None\n    copy__408 = torch.ops.aten.copy_.default(arg556_1, getitem_999);  arg556_1 = getitem_999 = copy__408 = None\n    copy__409 = torch.ops.aten.copy_.default(arg557_1, getitem_1000);  arg557_1 = getitem_1000 = copy__409 = None\n    copy__410 = torch.ops.aten.copy_.default(arg558_1, getitem_1001);  arg558_1 = getitem_1001 = copy__410 = None\n    copy__411 = torch.ops.aten.copy_.default(arg559_1, getitem_1002);  arg559_1 = getitem_1002 = copy__411 = None\n    copy__412 = torch.ops.aten.copy_.default(arg560_1, getitem_1003);  arg560_1 = getitem_1003 = copy__412 = None\n    copy__413 = torch.ops.aten.copy_.default(arg561_1, getitem_1004);  arg561_1 = getitem_1004 = copy__413 = None\n    copy__414 = torch.ops.aten.copy_.default(arg562_1, getitem_1005);  arg562_1 = getitem_1005 = copy__414 = None\n    copy__415 = torch.ops.aten.copy_.default(arg563_1, getitem_1006);  arg563_1 = getitem_1006 = copy__415 = None\n    copy__416 = torch.ops.aten.copy_.default(arg564_1, getitem_1007);  arg564_1 = getitem_1007 = copy__416 = None\n    copy__417 = torch.ops.aten.copy_.default(arg565_1, getitem_1008);  arg565_1 = getitem_1008 = copy__417 = None\n    copy__418 = torch.ops.aten.copy_.default(arg566_1, getitem_1009);  arg566_1 = getitem_1009 = copy__418 = None\n    copy__419 = torch.ops.aten.copy_.default(arg567_1, getitem_1010);  arg567_1 = getitem_1010 = copy__419 = None\n    copy__420 = torch.ops.aten.copy_.default(arg568_1, getitem_1011);  arg568_1 = getitem_1011 = copy__420 = None\n    copy__421 = torch.ops.aten.copy_.default(arg569_1, getitem_1012);  arg569_1 = getitem_1012 = copy__421 = None\n    copy__422 = torch.ops.aten.copy_.default(arg570_1, getitem_1013);  arg570_1 = getitem_1013 = copy__422 = None\n    copy__423 = torch.ops.aten.copy_.default(arg571_1, getitem_1014);  arg571_1 = getitem_1014 = copy__423 = None\n    copy__424 = torch.ops.aten.copy_.default(arg572_1, getitem_1015);  arg572_1 = getitem_1015 = copy__424 = None\n    copy__425 = torch.ops.aten.copy_.default(arg573_1, getitem_1016);  arg573_1 = getitem_1016 = copy__425 = None\n    copy__426 = torch.ops.aten.copy_.default(arg574_1, getitem_1017);  arg574_1 = getitem_1017 = copy__426 = None\n    copy__427 = torch.ops.aten.copy_.default(arg575_1, getitem_1018);  arg575_1 = getitem_1018 = copy__427 = None\n    copy__428 = torch.ops.aten.copy_.default(arg576_1, getitem_1019);  arg576_1 = getitem_1019 = copy__428 = None\n    copy__429 = torch.ops.aten.copy_.default(arg577_1, getitem_1020);  arg577_1 = getitem_1020 = copy__429 = None\n    copy__430 = torch.ops.aten.copy_.default(arg578_1, getitem_1021);  arg578_1 = getitem_1021 = copy__430 = None\n    copy__431 = torch.ops.aten.copy_.default(arg579_1, getitem_1022);  arg579_1 = getitem_1022 = copy__431 = None\n    copy__432 = torch.ops.aten.copy_.default(arg580_1, getitem_1023);  arg580_1 = getitem_1023 = copy__432 = None\n    copy__433 = torch.ops.aten.copy_.default(arg581_1, getitem_1024);  arg581_1 = getitem_1024 = copy__433 = None\n    copy__434 = torch.ops.aten.copy_.default(arg582_1, getitem_1025);  arg582_1 = getitem_1025 = copy__434 = None\n    copy__435 = torch.ops.aten.copy_.default(arg583_1, getitem_1026);  arg583_1 = getitem_1026 = copy__435 = None\n    copy__436 = torch.ops.aten.copy_.default(arg584_1, getitem_1027);  arg584_1 = getitem_1027 = copy__436 = None\n    copy__437 = torch.ops.aten.copy_.default(arg585_1, getitem_1028);  arg585_1 = getitem_1028 = copy__437 = None\n    copy__438 = torch.ops.aten.copy_.default(arg586_1, getitem_1029);  arg586_1 = getitem_1029 = copy__438 = None\n    copy__439 = torch.ops.aten.copy_.default(arg587_1, getitem_1030);  arg587_1 = getitem_1030 = copy__439 = None\n    copy__440 = torch.ops.aten.copy_.default(arg588_1, getitem_1031);  arg588_1 = getitem_1031 = copy__440 = None\n    copy__441 = torch.ops.aten.copy_.default(arg589_1, getitem_1032);  arg589_1 = getitem_1032 = copy__441 = None\n    copy__442 = torch.ops.aten.copy_.default(arg590_1, getitem_1033);  arg590_1 = getitem_1033 = copy__442 = None\n    copy__443 = torch.ops.aten.copy_.default(arg591_1, getitem_1034);  arg591_1 = getitem_1034 = copy__443 = None\n    copy__444 = torch.ops.aten.copy_.default(arg592_1, getitem_1035);  arg592_1 = getitem_1035 = copy__444 = None\n    copy__445 = torch.ops.aten.copy_.default(arg593_1, getitem);  arg593_1 = getitem = copy__445 = None\n    copy__446 = torch.ops.aten.copy_.default(arg594_1, getitem_2);  arg594_1 = getitem_2 = copy__446 = None\n    copy__447 = torch.ops.aten.copy_.default(arg595_1, getitem_3);  arg595_1 = getitem_3 = copy__447 = None\n    copy__448 = torch.ops.aten.copy_.default(arg596_1, getitem_4);  arg596_1 = getitem_4 = copy__448 = None\n    copy__449 = torch.ops.aten.copy_.default(arg597_1, getitem_5);  arg597_1 = getitem_5 = copy__449 = None\n    copy__450 = torch.ops.aten.copy_.default(arg598_1, getitem_6);  arg598_1 = getitem_6 = copy__450 = None\n    copy__451 = torch.ops.aten.copy_.default(arg599_1, getitem_7);  arg599_1 = getitem_7 = copy__451 = None\n    copy__452 = torch.ops.aten.copy_.default(arg600_1, getitem_8);  arg600_1 = getitem_8 = copy__452 = None\n    copy__453 = torch.ops.aten.copy_.default(arg601_1, getitem_9);  arg601_1 = getitem_9 = copy__453 = None\n    copy__454 = torch.ops.aten.copy_.default(arg602_1, getitem_10);  arg602_1 = getitem_10 = copy__454 = None\n    copy__455 = torch.ops.aten.copy_.default(arg603_1, getitem_11);  arg603_1 = getitem_11 = copy__455 = None\n    copy__456 = torch.ops.aten.copy_.default(arg604_1, getitem_12);  arg604_1 = getitem_12 = copy__456 = None\n    copy__457 = torch.ops.aten.copy_.default(arg605_1, getitem_13);  arg605_1 = getitem_13 = copy__457 = None\n    copy__458 = torch.ops.aten.copy_.default(arg606_1, getitem_14);  arg606_1 = getitem_14 = copy__458 = None\n    copy__459 = torch.ops.aten.copy_.default(arg607_1, getitem_15);  arg607_1 = getitem_15 = copy__459 = None\n    copy__460 = torch.ops.aten.copy_.default(arg608_1, getitem_16);  arg608_1 = getitem_16 = copy__460 = None\n    copy__461 = torch.ops.aten.copy_.default(arg609_1, getitem_17);  arg609_1 = getitem_17 = copy__461 = None\n    copy__462 = torch.ops.aten.copy_.default(arg610_1, getitem_18);  arg610_1 = getitem_18 = copy__462 = None\n    copy__463 = torch.ops.aten.copy_.default(arg611_1, getitem_19);  arg611_1 = getitem_19 = copy__463 = None\n    copy__464 = torch.ops.aten.copy_.default(arg612_1, getitem_20);  arg612_1 = getitem_20 = copy__464 = None\n    copy__465 = torch.ops.aten.copy_.default(arg613_1, getitem_21);  arg613_1 = getitem_21 = copy__465 = None\n    copy__466 = torch.ops.aten.copy_.default(arg614_1, getitem_22);  arg614_1 = getitem_22 = copy__466 = None\n    copy__467 = torch.ops.aten.copy_.default(arg615_1, getitem_23);  arg615_1 = getitem_23 = copy__467 = None\n    copy__468 = torch.ops.aten.copy_.default(arg616_1, getitem_24);  arg616_1 = getitem_24 = copy__468 = None\n    copy__469 = torch.ops.aten.copy_.default(arg617_1, getitem_25);  arg617_1 = getitem_25 = copy__469 = None\n    copy__470 = torch.ops.aten.copy_.default(arg618_1, getitem_26);  arg618_1 = getitem_26 = copy__470 = None\n    copy__471 = torch.ops.aten.copy_.default(arg619_1, getitem_27);  arg619_1 = getitem_27 = copy__471 = None\n    copy__472 = torch.ops.aten.copy_.default(arg620_1, getitem_28);  arg620_1 = getitem_28 = copy__472 = None\n    copy__473 = torch.ops.aten.copy_.default(arg621_1, getitem_29);  arg621_1 = getitem_29 = copy__473 = None\n    copy__474 = torch.ops.aten.copy_.default(arg622_1, getitem_30);  arg622_1 = getitem_30 = copy__474 = None\n    copy__475 = torch.ops.aten.copy_.default(arg623_1, getitem_31);  arg623_1 = getitem_31 = copy__475 = None\n    copy__476 = torch.ops.aten.copy_.default(arg624_1, getitem_32);  arg624_1 = getitem_32 = copy__476 = None\n    copy__477 = torch.ops.aten.copy_.default(arg625_1, getitem_33);  arg625_1 = getitem_33 = copy__477 = None\n    copy__478 = torch.ops.aten.copy_.default(arg626_1, getitem_34);  arg626_1 = getitem_34 = copy__478 = None\n    copy__479 = torch.ops.aten.copy_.default(arg627_1, getitem_35);  arg627_1 = getitem_35 = copy__479 = None\n    copy__480 = torch.ops.aten.copy_.default(arg628_1, getitem_36);  arg628_1 = getitem_36 = copy__480 = None\n    copy__481 = torch.ops.aten.copy_.default(arg629_1, getitem_37);  arg629_1 = getitem_37 = copy__481 = None\n    copy__482 = torch.ops.aten.copy_.default(arg630_1, getitem_38);  arg630_1 = getitem_38 = copy__482 = None\n    copy__483 = torch.ops.aten.copy_.default(arg631_1, getitem_39);  arg631_1 = getitem_39 = copy__483 = None\n    copy__484 = torch.ops.aten.copy_.default(arg632_1, getitem_40);  arg632_1 = getitem_40 = copy__484 = None\n    copy__485 = torch.ops.aten.copy_.default(arg633_1, getitem_41);  arg633_1 = getitem_41 = copy__485 = None\n    copy__486 = torch.ops.aten.copy_.default(arg634_1, getitem_42);  arg634_1 = getitem_42 = copy__486 = None\n    copy__487 = torch.ops.aten.copy_.default(arg635_1, getitem_43);  arg635_1 = getitem_43 = copy__487 = None\n    copy__488 = torch.ops.aten.copy_.default(arg636_1, getitem_44);  arg636_1 = getitem_44 = copy__488 = None\n    copy__489 = torch.ops.aten.copy_.default(arg637_1, getitem_45);  arg637_1 = getitem_45 = copy__489 = None\n    copy__490 = torch.ops.aten.copy_.default(arg638_1, getitem_46);  arg638_1 = getitem_46 = copy__490 = None\n    copy__491 = torch.ops.aten.copy_.default(arg639_1, getitem_47);  arg639_1 = getitem_47 = copy__491 = None\n    copy__492 = torch.ops.aten.copy_.default(arg640_1, getitem_48);  arg640_1 = getitem_48 = copy__492 = None\n    copy__493 = torch.ops.aten.copy_.default(arg641_1, getitem_49);  arg641_1 = getitem_49 = copy__493 = None\n    copy__494 = torch.ops.aten.copy_.default(arg642_1, getitem_50);  arg642_1 = getitem_50 = copy__494 = None\n    copy__495 = torch.ops.aten.copy_.default(arg643_1, getitem_51);  arg643_1 = getitem_51 = copy__495 = None\n    copy__496 = torch.ops.aten.copy_.default(arg644_1, getitem_52);  arg644_1 = getitem_52 = copy__496 = None\n    copy__497 = torch.ops.aten.copy_.default(arg645_1, getitem_53);  arg645_1 = getitem_53 = copy__497 = None\n    copy__498 = torch.ops.aten.copy_.default(arg646_1, getitem_54);  arg646_1 = getitem_54 = copy__498 = None\n    copy__499 = torch.ops.aten.copy_.default(arg647_1, getitem_55);  arg647_1 = getitem_55 = copy__499 = None\n    copy__500 = torch.ops.aten.copy_.default(arg648_1, getitem_56);  arg648_1 = getitem_56 = copy__500 = None\n    copy__501 = torch.ops.aten.copy_.default(arg649_1, getitem_57);  arg649_1 = getitem_57 = copy__501 = None\n    copy__502 = torch.ops.aten.copy_.default(arg650_1, getitem_58);  arg650_1 = getitem_58 = copy__502 = None\n    copy__503 = torch.ops.aten.copy_.default(arg651_1, getitem_59);  arg651_1 = getitem_59 = copy__503 = None\n    copy__504 = torch.ops.aten.copy_.default(arg652_1, getitem_60);  arg652_1 = getitem_60 = copy__504 = None\n    copy__505 = torch.ops.aten.copy_.default(arg653_1, getitem_61);  arg653_1 = getitem_61 = copy__505 = None\n    copy__506 = torch.ops.aten.copy_.default(arg654_1, getitem_62);  arg654_1 = getitem_62 = copy__506 = None\n    copy__507 = torch.ops.aten.copy_.default(arg655_1, getitem_63);  arg655_1 = getitem_63 = copy__507 = None\n    copy__508 = torch.ops.aten.copy_.default(arg656_1, getitem_64);  arg656_1 = getitem_64 = copy__508 = None\n    copy__509 = torch.ops.aten.copy_.default(arg657_1, getitem_65);  arg657_1 = getitem_65 = copy__509 = None\n    copy__510 = torch.ops.aten.copy_.default(arg658_1, getitem_66);  arg658_1 = getitem_66 = copy__510 = None\n    copy__511 = torch.ops.aten.copy_.default(arg659_1, getitem_67);  arg659_1 = getitem_67 = copy__511 = None\n    copy__512 = torch.ops.aten.copy_.default(arg660_1, getitem_68);  arg660_1 = getitem_68 = copy__512 = None\n    copy__513 = torch.ops.aten.copy_.default(arg661_1, getitem_69);  arg661_1 = getitem_69 = copy__513 = None\n    copy__514 = torch.ops.aten.copy_.default(arg662_1, getitem_70);  arg662_1 = getitem_70 = copy__514 = None\n    copy__515 = torch.ops.aten.copy_.default(arg663_1, getitem_71);  arg663_1 = getitem_71 = copy__515 = None\n    copy__516 = torch.ops.aten.copy_.default(arg664_1, getitem_72);  arg664_1 = getitem_72 = copy__516 = None\n    copy__517 = torch.ops.aten.copy_.default(arg665_1, getitem_73);  arg665_1 = getitem_73 = copy__517 = None\n    copy__518 = torch.ops.aten.copy_.default(arg666_1, getitem_74);  arg666_1 = getitem_74 = copy__518 = None\n    copy__519 = torch.ops.aten.copy_.default(arg667_1, getitem_75);  arg667_1 = getitem_75 = copy__519 = None\n    copy__520 = torch.ops.aten.copy_.default(arg668_1, getitem_76);  arg668_1 = getitem_76 = copy__520 = None\n    copy__521 = torch.ops.aten.copy_.default(arg669_1, getitem_77);  arg669_1 = getitem_77 = copy__521 = None\n    copy__522 = torch.ops.aten.copy_.default(arg670_1, getitem_78);  arg670_1 = getitem_78 = copy__522 = None\n    copy__523 = torch.ops.aten.copy_.default(arg671_1, getitem_79);  arg671_1 = getitem_79 = copy__523 = None\n    copy__524 = torch.ops.aten.copy_.default(arg672_1, getitem_80);  arg672_1 = getitem_80 = copy__524 = None\n    copy__525 = torch.ops.aten.copy_.default(arg673_1, getitem_81);  arg673_1 = getitem_81 = copy__525 = None\n    copy__526 = torch.ops.aten.copy_.default(arg674_1, getitem_82);  arg674_1 = getitem_82 = copy__526 = None\n    copy__527 = torch.ops.aten.copy_.default(arg675_1, getitem_83);  arg675_1 = getitem_83 = copy__527 = None\n    copy__528 = torch.ops.aten.copy_.default(arg676_1, getitem_84);  arg676_1 = getitem_84 = copy__528 = None\n    copy__529 = torch.ops.aten.copy_.default(arg677_1, getitem_85);  arg677_1 = getitem_85 = copy__529 = None\n    copy__530 = torch.ops.aten.copy_.default(arg678_1, getitem_86);  arg678_1 = getitem_86 = copy__530 = None\n    copy__531 = torch.ops.aten.copy_.default(arg679_1, getitem_87);  arg679_1 = getitem_87 = copy__531 = None\n    copy__532 = torch.ops.aten.copy_.default(arg680_1, getitem_88);  arg680_1 = getitem_88 = copy__532 = None\n    copy__533 = torch.ops.aten.copy_.default(arg681_1, getitem_89);  arg681_1 = getitem_89 = copy__533 = None\n    copy__534 = torch.ops.aten.copy_.default(arg682_1, getitem_90);  arg682_1 = getitem_90 = copy__534 = None\n    copy__535 = torch.ops.aten.copy_.default(arg683_1, getitem_91);  arg683_1 = getitem_91 = copy__535 = None\n    copy__536 = torch.ops.aten.copy_.default(arg684_1, getitem_92);  arg684_1 = getitem_92 = copy__536 = None\n    copy__537 = torch.ops.aten.copy_.default(arg685_1, getitem_93);  arg685_1 = getitem_93 = copy__537 = None\n    copy__538 = torch.ops.aten.copy_.default(arg686_1, getitem_94);  arg686_1 = getitem_94 = copy__538 = None\n    copy__539 = torch.ops.aten.copy_.default(arg687_1, getitem_95);  arg687_1 = getitem_95 = copy__539 = None\n    copy__540 = torch.ops.aten.copy_.default(arg688_1, getitem_96);  arg688_1 = getitem_96 = copy__540 = None\n    copy__541 = torch.ops.aten.copy_.default(arg689_1, getitem_97);  arg689_1 = getitem_97 = copy__541 = None\n    copy__542 = torch.ops.aten.copy_.default(arg690_1, getitem_98);  arg690_1 = getitem_98 = copy__542 = None\n    copy__543 = torch.ops.aten.copy_.default(arg691_1, getitem_99);  arg691_1 = getitem_99 = copy__543 = None\n    copy__544 = torch.ops.aten.copy_.default(arg692_1, getitem_100);  arg692_1 = getitem_100 = copy__544 = None\n    copy__545 = torch.ops.aten.copy_.default(arg693_1, getitem_101);  arg693_1 = getitem_101 = copy__545 = None\n    copy__546 = torch.ops.aten.copy_.default(arg694_1, getitem_102);  arg694_1 = getitem_102 = copy__546 = None\n    copy__547 = torch.ops.aten.copy_.default(arg695_1, getitem_103);  arg695_1 = getitem_103 = copy__547 = None\n    copy__548 = torch.ops.aten.copy_.default(arg696_1, getitem_104);  arg696_1 = getitem_104 = copy__548 = None\n    copy__549 = torch.ops.aten.copy_.default(arg697_1, getitem_105);  arg697_1 = getitem_105 = copy__549 = None\n    copy__550 = torch.ops.aten.copy_.default(arg698_1, getitem_106);  arg698_1 = getitem_106 = copy__550 = None\n    copy__551 = torch.ops.aten.copy_.default(arg699_1, getitem_107);  arg699_1 = getitem_107 = copy__551 = None\n    copy__552 = torch.ops.aten.copy_.default(arg700_1, getitem_108);  arg700_1 = getitem_108 = copy__552 = None\n    copy__553 = torch.ops.aten.copy_.default(arg701_1, getitem_109);  arg701_1 = getitem_109 = copy__553 = None\n    copy__554 = torch.ops.aten.copy_.default(arg702_1, getitem_110);  arg702_1 = getitem_110 = copy__554 = None\n    copy__555 = torch.ops.aten.copy_.default(arg703_1, getitem_111);  arg703_1 = getitem_111 = copy__555 = None\n    copy__556 = torch.ops.aten.copy_.default(arg704_1, getitem_112);  arg704_1 = getitem_112 = copy__556 = None\n    copy__557 = torch.ops.aten.copy_.default(arg705_1, getitem_113);  arg705_1 = getitem_113 = copy__557 = None\n    copy__558 = torch.ops.aten.copy_.default(arg706_1, getitem_114);  arg706_1 = getitem_114 = copy__558 = None\n    copy__559 = torch.ops.aten.copy_.default(arg707_1, getitem_115);  arg707_1 = getitem_115 = copy__559 = None\n    copy__560 = torch.ops.aten.copy_.default(arg708_1, getitem_116);  arg708_1 = getitem_116 = copy__560 = None\n    copy__561 = torch.ops.aten.copy_.default(arg709_1, getitem_117);  arg709_1 = getitem_117 = copy__561 = None\n    copy__562 = torch.ops.aten.copy_.default(arg710_1, getitem_118);  arg710_1 = getitem_118 = copy__562 = None\n    copy__563 = torch.ops.aten.copy_.default(arg711_1, getitem_119);  arg711_1 = getitem_119 = copy__563 = None\n    copy__564 = torch.ops.aten.copy_.default(arg712_1, getitem_120);  arg712_1 = getitem_120 = copy__564 = None\n    copy__565 = torch.ops.aten.copy_.default(arg713_1, getitem_121);  arg713_1 = getitem_121 = copy__565 = None\n    copy__566 = torch.ops.aten.copy_.default(arg714_1, getitem_122);  arg714_1 = getitem_122 = copy__566 = None\n    copy__567 = torch.ops.aten.copy_.default(arg715_1, getitem_123);  arg715_1 = getitem_123 = copy__567 = None\n    copy__568 = torch.ops.aten.copy_.default(arg716_1, getitem_124);  arg716_1 = getitem_124 = copy__568 = None\n    copy__569 = torch.ops.aten.copy_.default(arg717_1, getitem_125);  arg717_1 = getitem_125 = copy__569 = None\n    copy__570 = torch.ops.aten.copy_.default(arg718_1, getitem_126);  arg718_1 = getitem_126 = copy__570 = None\n    copy__571 = torch.ops.aten.copy_.default(arg719_1, getitem_127);  arg719_1 = getitem_127 = copy__571 = None\n    copy__572 = torch.ops.aten.copy_.default(arg720_1, getitem_128);  arg720_1 = getitem_128 = copy__572 = None\n    copy__573 = torch.ops.aten.copy_.default(arg721_1, getitem_129);  arg721_1 = getitem_129 = copy__573 = None\n    copy__574 = torch.ops.aten.copy_.default(arg722_1, getitem_130);  arg722_1 = getitem_130 = copy__574 = None\n    copy__575 = torch.ops.aten.copy_.default(arg723_1, getitem_131);  arg723_1 = getitem_131 = copy__575 = None\n    copy__576 = torch.ops.aten.copy_.default(arg724_1, getitem_132);  arg724_1 = getitem_132 = copy__576 = None\n    copy__577 = torch.ops.aten.copy_.default(arg725_1, getitem_133);  arg725_1 = getitem_133 = copy__577 = None\n    copy__578 = torch.ops.aten.copy_.default(arg726_1, getitem_134);  arg726_1 = getitem_134 = copy__578 = None\n    copy__579 = torch.ops.aten.copy_.default(arg727_1, getitem_135);  arg727_1 = getitem_135 = copy__579 = None\n    copy__580 = torch.ops.aten.copy_.default(arg728_1, getitem_136);  arg728_1 = getitem_136 = copy__580 = None\n    copy__581 = torch.ops.aten.copy_.default(arg729_1, getitem_137);  arg729_1 = getitem_137 = copy__581 = None\n    copy__582 = torch.ops.aten.copy_.default(arg730_1, getitem_138);  arg730_1 = getitem_138 = copy__582 = None\n    copy__583 = torch.ops.aten.copy_.default(arg731_1, getitem_139);  arg731_1 = getitem_139 = copy__583 = None\n    copy__584 = torch.ops.aten.copy_.default(arg732_1, getitem_140);  arg732_1 = getitem_140 = copy__584 = None\n    copy__585 = torch.ops.aten.copy_.default(arg733_1, getitem_141);  arg733_1 = getitem_141 = copy__585 = None\n    copy__586 = torch.ops.aten.copy_.default(arg734_1, getitem_142);  arg734_1 = getitem_142 = copy__586 = None\n    copy__587 = torch.ops.aten.copy_.default(arg735_1, getitem_143);  arg735_1 = getitem_143 = copy__587 = None\n    copy__588 = torch.ops.aten.copy_.default(arg736_1, getitem_144);  arg736_1 = getitem_144 = copy__588 = None\n    copy__589 = torch.ops.aten.copy_.default(arg737_1, getitem_145);  arg737_1 = getitem_145 = copy__589 = None\n    copy__590 = torch.ops.aten.copy_.default(arg738_1, getitem_146);  arg738_1 = getitem_146 = copy__590 = None\n    copy__591 = torch.ops.aten.copy_.default(arg739_1, getitem_147);  arg739_1 = getitem_147 = copy__591 = None\n    return ()\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[3]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[4]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[5]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[6]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[7]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[8]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[9]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[10]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[11]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[12]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[13]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[14]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[15]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[16]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[17]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[18]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[19]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[20]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[21]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[22]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[23]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[24]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[25]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[26]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[27]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[28]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[29]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[30]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[31]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[32]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[33]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[34]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[35]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[36]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[37]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[38]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[39]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[40]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[41]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[42]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[43]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[44]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[45]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[46]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[47]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[48]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[49]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[50]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[51]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[52]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[53]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[54]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[55]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[56]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[57]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[58]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[59]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[60]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[61]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[62]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[63]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[64]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[65]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[66]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[67]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[68]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[69]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[70]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[71]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[72]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[73]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[74]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[75]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[76]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[77]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[78]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[79]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[80]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[81]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[82]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[83]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[84]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[85]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[86]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[87]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[88]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[89]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[90]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[91]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[92]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[93]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[94]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[95]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[96]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[97]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[98]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[99]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[100]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[101]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[102]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[103]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[104]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[105]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[106]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[107]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[108]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[109]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[110]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[111]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[112]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[113]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[114]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[115]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[116]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[117]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[118]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[119]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[120]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[121]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[122]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[123]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[124]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[125]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[126]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[127]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[128]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[129]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[130]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[131]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[132]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[133]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[134]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[135]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[136]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[137]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[138]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[139]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[140]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[141]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[142]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[143]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[144]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[145]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[146]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[147]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[148]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[149]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[j2qdbepspnv5xu5aaa4vfybenn6kcipqnlg4axmhoebbdlhdfcg] example_inputs[150]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3145728, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ucvyfteusmf3hkyqsnlp5ug5dh4kqnbxlr56s7pvkidgpyg2jx5] example_inputs[151]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6gu2nloxzcxrfltpbua7lqp7kkjihxd4w7afynotijqr3v7nhh5] example_inputs[152]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1024, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[153]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[154]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[155]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[156]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[157]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[158]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[159]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[160]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[161]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[162]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[163]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[164]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[165]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[166]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[167]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[168]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[169]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[170]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[171]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[172]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[173]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[174]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[175]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[176]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[177]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[178]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[179]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[180]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[181]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[182]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[183]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[184]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[185]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[186]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[187]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[188]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[189]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[190]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[191]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[192]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[193]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[194]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[195]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[196]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[197]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[198]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[199]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[200]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[201]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[202]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[203]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[204]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[205]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[206]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[207]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[208]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[209]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[210]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[211]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[212]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[213]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[214]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[215]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[216]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[217]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[218]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[219]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[220]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[221]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[222]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[223]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[224]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[225]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[226]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[227]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[228]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[229]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[230]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[231]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[232]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[233]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[234]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[235]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[236]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[237]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[238]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[239]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[240]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[241]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[242]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[243]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[244]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[245]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[246]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[247]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[248]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[249]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[250]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[251]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[252]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[253]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[254]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[255]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[256]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[257]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[258]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[259]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[260]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[261]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[262]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[263]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[264]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[265]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[266]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[267]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[268]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[269]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[270]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[271]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[272]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[273]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[274]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[275]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[276]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[277]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[278]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[279]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[280]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[281]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[282]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[283]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[284]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[285]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[286]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hrasck5lnskaq7gv4vyfi44p5n7y3tr36z7trckbwnuv3eyapmk] example_inputs[287]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6to4nwahrkxgsie75pfivv3ndqufs4lughzpqcdhir3tm3ezo3n] example_inputs[288]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[2ltp76oexmn4iaroifj5ea33qciy4yxhn7j6tpopk3n2hntts7a] example_inputs[289]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[290]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[291]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[292]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ci5juyeynyazk6a6gqle7pthpejmypuopbxaghaccdz2d3kpsz4] example_inputs[293]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[wzhy2ewb5m2s7i22j2cam7aomq3hpahzofgjnxkb6jug7de7qgw] example_inputs[294]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[3tccso7i75mxvdsbabjcgf45nen6thm6hxdkqvudhrh5iu2kbs7] example_inputs[295]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[296]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[297]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ln6xprjugjuaphagzxcpw3ly7ocdvczoqxk4ltvl3onfxvve4on] example_inputs[298]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[299]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[300]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[301]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[302]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[303]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[304]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[305]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[306]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[307]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[308]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[309]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[310]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[311]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[312]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[313]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[314]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[315]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[316]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[317]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[318]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[319]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[320]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[321]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[322]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[323]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[324]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[325]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[326]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[327]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[328]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[329]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[330]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[331]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[332]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[333]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[334]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[335]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[336]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[337]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[338]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[339]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[340]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[341]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[342]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[343]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[344]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[345]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[346]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[347]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[348]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[349]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[350]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[351]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[352]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[353]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[354]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[355]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[356]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[357]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[358]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[359]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[360]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[361]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[362]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[363]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[364]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[365]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[366]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[367]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[368]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[369]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[370]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[371]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[372]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[373]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[374]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[375]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[376]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[377]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[378]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[379]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[380]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[381]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[382]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[383]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[384]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[385]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[386]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[387]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[388]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[389]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[390]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[391]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[392]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[393]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[394]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[395]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[396]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[397]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[398]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[399]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[400]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[401]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[402]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[403]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[404]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[405]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[406]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[407]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[408]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[409]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[410]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[411]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[412]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[413]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[414]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[415]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[416]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[417]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[418]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[419]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[420]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[421]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[422]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[423]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[424]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[425]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[426]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[427]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[428]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[429]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[430]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[431]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[432]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[433]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[434]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[435]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[436]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[437]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[438]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[439]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[440]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[441]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[442]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[443]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[444]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[445]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[l36txgwatro2oumaemq3fn7fzd6c7rqt7jkya4whofwgykwhyac] example_inputs[446]: TensorMetadata(dtype=torch.float32, shape=torch.Size([50304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=154533888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[447]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[448]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[449]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[450]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[451]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[452]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[453]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[454]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[455]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[456]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[457]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[458]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[459]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[460]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[461]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[462]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[463]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[464]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[465]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[466]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[467]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[468]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[469]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[470]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[471]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[472]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[473]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[474]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[475]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[476]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[477]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[478]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[479]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[480]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[481]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[482]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[483]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[484]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[485]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[486]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[487]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[488]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[489]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[490]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[491]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[492]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[493]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[494]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[495]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[496]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[497]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[498]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[499]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[500]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[501]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[502]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[503]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[504]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[505]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[506]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[507]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[508]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[509]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[510]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[511]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[512]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[513]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[514]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[515]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[516]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[517]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[518]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[519]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[520]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[521]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[522]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[523]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[524]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[525]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[526]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[527]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[528]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[529]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[530]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[531]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[532]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[533]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[534]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[535]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[536]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[537]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[538]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[539]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[540]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[541]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[542]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[543]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[544]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[545]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[546]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[547]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[548]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[549]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[550]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[551]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[552]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[553]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[554]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[555]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[556]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[557]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[558]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[559]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[560]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[561]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[562]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[563]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[564]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[565]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[566]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[567]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[568]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[569]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[570]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[571]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[572]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[573]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[574]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[575]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[576]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[577]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[578]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[579]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[580]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ku6bnu4u7osvly4jsmf2w2orf22tghfytwzq6ycj3otmnq6f3wn] example_inputs[581]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=7077888, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[57h3bzk67jreuqiikhvgdtcn2mdkz7xuwhdnrjueg4dxxn6tfzm] example_inputs[582]: TensorMetadata(dtype=torch.float32, shape=torch.Size([2304]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9216, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[a64xnvbb6q6gew5c3aovaeqivj3hstrmxkbra3eidivx42znnuv] example_inputs[583]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=2359296, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[584]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[585]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[586]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[aafg74anlk7yqmlg6ucup4tmjwhskblh75z227dcfmwpfzw7dzm] example_inputs[587]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072, 768]), stride=(768, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[sefiqkkteehwsnilwpljkx7vdgondub2hwggoishzmx7o5etmjk] example_inputs[588]: TensorMetadata(dtype=torch.float32, shape=torch.Size([3072]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=12288, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[6v4azk5hvrkyq22k6mfa2l7ovaddatmim7mcvcpdku2ieirjd64] example_inputs[589]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768, 3072]), stride=(3072, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=9437184, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[590]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[591]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[ymueslj5cs5k2l243gcdaohltqzpminou4x5gltmu2cgjgroyyd] example_inputs[592]: TensorMetadata(dtype=torch.float32, shape=torch.Size([768]), stride=(1,), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=3072, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[593]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[594]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[595]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[596]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[597]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[598]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[599]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[600]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[601]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[602]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[603]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[604]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[605]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[606]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[607]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[608]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[609]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[610]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[611]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[612]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[613]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[614]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[615]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[616]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[617]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[618]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[619]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[620]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[621]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[622]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[623]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[624]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[625]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[626]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[627]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[628]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[629]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[630]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[631]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[632]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[633]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[634]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[635]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[636]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[637]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[638]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[639]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[640]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[641]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[642]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[643]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[644]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[645]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[646]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[647]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[648]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[649]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[650]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[651]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[652]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[653]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[654]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[655]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[656]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[657]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[658]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[659]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[660]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[661]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[662]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[663]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[664]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[665]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[666]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[667]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[668]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[669]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[670]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[671]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[672]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[673]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[674]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[675]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[676]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[677]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[678]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[679]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[680]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[681]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[682]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[683]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[684]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[685]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[686]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[687]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[688]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[689]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[690]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[691]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[692]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[693]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[694]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[695]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[696]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[697]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[698]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[699]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[700]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[701]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[702]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[703]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[704]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[705]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[706]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[707]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[708]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[709]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[710]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[711]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[712]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[713]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[714]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[715]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[716]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[717]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[718]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[719]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[720]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[721]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[722]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[723]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[724]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[725]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[726]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[727]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[728]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[729]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[730]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[731]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[732]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[733]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[734]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[735]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[736]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[737]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[738]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[iyogpwfek6zjggzelzpjeon4muycgzpe56o5gt3flsg5vojztet] example_inputs[739]: TensorMetadata(dtype=torch.float32, shape=torch.Size([]), stride=(), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=4, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[moqyx73pd52qgfefpqfwjkxjzf4hj25dyzowq4prptxxsmfw2vg] fx_kwargs[cudagraphs]: BoxedBool(value=True)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[t755of6lmsc7np3j6spka2x5yvicie732qv4wx6uu67rphf6elu] fx_kwargs[static_input_idxs]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739]", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] fx_kwargs[user_visible_outputs]: {}", "[5gxbt6glr3gti63xp7cch6ofdqfxvag7hsiwxbut4if4xrx6d4g] inputs_to_check[0]: 151", "[iaarrh4a5kr5bv73kkmrhkca4ysumu2vh65kdzworlx74re2dpo] inputs_to_check[1]: 152", "[qmhmrekvpai4tc7rejm43nkaq3mgt2cy6w6mzkdg2pdzt4xbwx2] inputs_to_check[2]: 153", "[qrczcfquzsvwccgmqugssyaib555w6hfdt4shqzvb6brfm3i5el] inputs_to_check[3]: 154", "[6osxch7or66drdp4fy276u75unoezfbp32qtojzfami7nbek767] inputs_to_check[4]: 155", "[o3fk4nlt4btpkqljmyxroatirrdhqjr4d264i2tywc6raomfjsw] inputs_to_check[5]: 156", "[33b7c6n5un2rbt5kdgtbsh6c64len6cjzmios66nja6gia4ojcx] inputs_to_check[6]: 157", "[ccishwh2vlgdi4q6qdu3en4gukgptvbrqyx5rofx72wz3bicnzy] inputs_to_check[7]: 158", "[6pn6oydkkil5wvbpu5uvdffsyymbzhxx3t2skamg4wp5vtb3n5k] inputs_to_check[8]: 159", "[alex6ca6gpzizomfu3wq3xj36jnymygy7hiroowxhaypic6tskv] inputs_to_check[9]: 160", "[bivqezf4ymabhorni5gd4fe3urab3kvepwenq5gmvosf2pavdd5] inputs_to_check[10]: 161", "[asq5khhnfffkvmnnmgeoqsye4l64y7kkvyk3snk2tyrgf2qb5mi] inputs_to_check[11]: 162", "[wz2lj7dbnoawsawyjhobr6oa52jgjntn3o3lmrtkb4bmzfapwhc] inputs_to_check[12]: 163", "[ngbs2fex7zmmncfaogkkrqimpfgq4wjlkqmf3tcyeswwe3hg3od] inputs_to_check[13]: 164", "[fssplvrotxdu6guo4defun7h45ns624vjjonloaz7etd7ggxjg4] inputs_to_check[14]: 165", "[sc6cvy6nxzmjuelrp2whvlhran2f4f6elzbuslbei3dfnaq6qkv] inputs_to_check[15]: 166", "[p44ecp3xwjohf6mop4nzmt5wxi7uzcii63xm5kryejmipxkfjev] inputs_to_check[16]: 167", "[xngi5shtw7jcqe2utazf35f7dj5ypzfuduqyypjo6jvv32knjf3] inputs_to_check[17]: 168", "[niz4gr5x3ya3ukuhf4c7rnhhrswyan3kbzclc6g52u3kvurqmf4] inputs_to_check[18]: 169", "[bfsiwqbrjfxsnzozsohdqmqjqwa5itj3abqdtdwxf7vxxw2t7iq] inputs_to_check[19]: 170", "[ditguzdbhtwz6l4chqi3z6cziyi2pct43zhzf5zaragfhqf5akd] inputs_to_check[20]: 171", "[e3h4w2vfojacabco57r6aj43dgqrhfx545yrdu7qzxkvjyf52t5] inputs_to_check[21]: 172", "[mfhhdultow3ovihpso6dtsdpntivwjcpfismlaqwsjc6iainhtu] inputs_to_check[22]: 173", "[kpyorpxsozia3z5wqrbnqnnff6bma7xllpbedgwp4gp76wwwegc] inputs_to_check[23]: 174", "[lvgyjizlzse4yknfjmvl2uqg4ffygwpuddjgunbxcbjm5g5d5fw] inputs_to_check[24]: 175", "[qgbusewwprhncdk5ahq4fytqvx7fhrecfetchddqudoq7lkyx3b] inputs_to_check[25]: 176", "[hmzhjnkpczbvqnshjcfp45u74kngsbufwspp5fa5e4zidpcwwcs] inputs_to_check[26]: 177", "[oqg53nqin266dynz7o2z27qo3hcxucd5yc3or2656uoqs6quan2] inputs_to_check[27]: 178", "[k6swhf5ty2zmij2rxhvvpe7jwdxx6llqawhiqixvpvjgc6dhkl6] inputs_to_check[28]: 179", "[a5s6scmr5zbjnm5a6isxptkuttpfh2uv2g7tza7ci6z366qvhph] inputs_to_check[29]: 180", "[ma4hmi2zhz2v5pv6nw7owyw3z3lswrc5nveayyb3j5iastoc7zl] inputs_to_check[30]: 181", "[4nwllnnjxh7swihocoe7pvsijtlpzpz6yjwckaahmgpaeimozt7] inputs_to_check[31]: 182", "[wzkita6x3wixk2x6zsfnpzfqrfaezroxu5xnw7rq7ox7vqgu476] inputs_to_check[32]: 183", "[xrjqiropfaxwzdwzzhyx6e6c7hjnkppzronjqgpzo6hbpi6yr4g] inputs_to_check[33]: 184", "[urai455awfpx3bjmqadnlbnbklixi676brusjoruximpnjo76n2] inputs_to_check[34]: 185", "[eermw7oa3whtf4qdg6cogh7s3xez3tfiest32uplhnwmagpouh6] inputs_to_check[35]: 186", "[64xsnjmjbzfylxidk5yl6hsvuzswofeqbq3zuv7a6bkdopfeapn] inputs_to_check[36]: 187", "[6kuevh47g3elelxoo5ac7cmo3r2fh2ygbhs6qyljspkz4y6r7r7] inputs_to_check[37]: 188", "[ihnc7ngbkirbzwxoyjfhpwki2ewnnpkuzxlegp6fmw6fykdxxj3] inputs_to_check[38]: 189", "[g3ay2xbjws2ov73c4lkobfibuq4wxwxe75uogzdg7crgtzlagn5] inputs_to_check[39]: 190", "[6lydlqaer6b3qvlthv7uluevii5gvxgissp4oodsoye43zyvm7f] inputs_to_check[40]: 191", "[mrm72xpjwecc4eczy6w3ndrca6qgx4ssucludsfllsadesjz7pl] inputs_to_check[41]: 192", "[7cz46kewyqtcfh7adjmk5i2ljoq5v44ofijq4gmlca7gjy55c6r] inputs_to_check[42]: 193", "[2ubcxo5fpwyipcg26qkwk7dfk4ci2edpvwxh4fhvgvstq4cf3ke] inputs_to_check[43]: 194", "[idxzwtb6yotm5u6qhotbqxbuytqogl4lyuzcp4f4rpiekingxrj] inputs_to_check[44]: 195", "[sxompataxg2kpp6lvmimnzadenerjisuicfkfuwm5exoinhfbsg] inputs_to_check[45]: 196", "[5pmk7sv27s2bi54s3kwyduqs4ly256qfb2hfrlqfamtcsbur3iv] inputs_to_check[46]: 197", "[pujj5ix4dbdajeweoew7fe743v6v6wscq7k7pjsqiqopewlh6s4] inputs_to_check[47]: 198", "[tfk4gvmeljn6oc7yzg7ablm5slfgj5iwldvib27lgy3acro6g77] inputs_to_check[48]: 199", "[3yqkxangefsazunaw2ibltnnexjixpvosdxyq7kipwrmhng4d66] inputs_to_check[49]: 200", "[dek4vtwl3t4tioy2oedefor7hqzq7doc3fj4wwdmgrfpt773mvr] inputs_to_check[50]: 201", "[eyzompn7rqbpbwprodhvszb4fjs3fubclamjylwqsna5imftnou] inputs_to_check[51]: 202", "[cna2jzzfijl2grhnqpag2peenci7zfourhgcdzidromdrqdyvwm] inputs_to_check[52]: 203", "[m5mnhtreky3cpmvgnfmbkri3pmhs22tu3kahhkdxv2q67t7rtxk] inputs_to_check[53]: 204", "[pf3yxn6pwjw3apolzviv77ube4xeqq2n2lgwcduyjvzgiyxg45s] inputs_to_check[54]: 205", "[zlqnl55vmxcplhlix7khtasmq2gdecqd7jpore57pll2by4u67y] inputs_to_check[55]: 206", "[6u3htmimfebyyyavsbzctid7bqe3p2vzitaht7rhqdc6l653asq] inputs_to_check[56]: 207", "[vnxrp3cswdykkkdcda2rykgrj6p7mbsgoq2euf3nhebgbrbdnah] inputs_to_check[57]: 208", "[xkuyzvn72atoye7xvdr7nkkl6r43muegtld7i23uic3gez4op3w] inputs_to_check[58]: 209", "[mc4hinl3b4abbhnnd6kjw3mpbdnhcszhce746aznurtp7rckvqe] inputs_to_check[59]: 210", "[ofm6prxr6hqz2u5z3oywwp5635di572xearfxgeqikq7ir5zyer] inputs_to_check[60]: 211", "[ib352syzxfoxetnwcwmr562kq6zxh3ba6k6ozr7vmgirzngz445] inputs_to_check[61]: 212", "[z7vzn4qm5gv2ec4zm5oa552msg56z4an6jyi43vpqrh6rcwtww5] inputs_to_check[62]: 213", "[6ijaweudkgtayjajytjpkgptbxqygprffq2iv65twukqg2ks24j] inputs_to_check[63]: 214", "[zr4ja2xbcw5fkklyjkk4dqkrixbqthrmy4gx6wolog3g6twxagb] inputs_to_check[64]: 215", "[f7rs6g77lmqs6rceoayty7ukws77rxwoi2litnshxyvn2l4qcrs] inputs_to_check[65]: 216", "[iq4ks6jqbobe4ub7eedwgwhm6u7dujn4pdk6xtzlsutw35gihbh] inputs_to_check[66]: 217", "[c4tbwksvxlxtiymirqu6houav6ecq5pr3zf4phc5ksuu6ccao5t] inputs_to_check[67]: 218", "[i3gn36xaneuwkxpjtyp3iiaeudseihrk47h5len33wjeyzt6ez5] inputs_to_check[68]: 219", "[2hfliifbgstom3wq5au6yesetrbhjmazl3j2z7o4wvztaigyd7p] inputs_to_check[69]: 220", "[jsavgbo66sdqebklxk3p76jjgnvror75kkpwbfrr2grkuudknxt] inputs_to_check[70]: 221", "[nl6eg6mm4g66lxwzopu5webjhh4pq52imycgdnbzw5sdafpfzuh] inputs_to_check[71]: 222", "[uulsqibga6yzqtpej2uf4km34ygpbdt4gya2hth26yioguk4rlq] inputs_to_check[72]: 223", "[ptaz43zzcc5wqnllxp3fe7pvbo75xmcjlabttblccy46hygeozy] inputs_to_check[73]: 224", "[wlmb2nn3nq5s77bhgmozz6k5xgxru6empq7wgpphhcgprnpnc7o] inputs_to_check[74]: 225", "[ltksjik3ctg6uqqzzgdjyvza6wpx757hanbg2zoy3qa4k3a4vpx] inputs_to_check[75]: 226", "[7u7ifib46vahqccsmg654f3uzmjbluuqepiqql3s5ozwtzqz7pp] inputs_to_check[76]: 227", "[osag4eozwslt5b7yuzth3lgba32eotdkqtr2kw4ey4i43bgvvrs] inputs_to_check[77]: 228", "[zwdkykxjgt356ykebzld3rzgsfc5zlfk5st4we3ykzkaba3oqex] inputs_to_check[78]: 229", "[iungcnzcibs3necrx3njdt2ckikflhexkoicbep7tvcvtj5ly5d] inputs_to_check[79]: 230", "[bwmghobfcwh2lrdjqskkhe6u3vox2fbz53b65rgrzlmndirmzbm] inputs_to_check[80]: 231", "[qhj52t7zdp3oargrkm4bg6aao35lsfeuhsm5pgevylkqroeb4lc] inputs_to_check[81]: 232", "[bz4ayxadi54u4x7rrse46x6v7bfpgk5qy4scu6kg76fjuxlpy6u] inputs_to_check[82]: 233", "[gw4sqgphdvlxdqktwxjso65pxpymqslnhgr6l4eyswl5izdixw6] inputs_to_check[83]: 234", "[ewdr2tnhafkagyyp46wn5led3h754p4nzttu3w54uhxrqux3vvn] inputs_to_check[84]: 235", "[4e7a3dp2ygblswjy2t4s5ytwvfgeppo6v5xajwfebrzqvxl5c3z] inputs_to_check[85]: 236", "[dnrgqiivi7fu47qt3k5ea43mmzonrx62hvlvj7p4zs2whj47y7d] inputs_to_check[86]: 237", "[pyoye6lh4ebpuzincukqsblo7sz2ok35q5n23bykkflupvr6b3x] inputs_to_check[87]: 238", "[zzav4ck6zf5ii2aadgk45satnvsrcelh7lwasful7siezs7emg2] inputs_to_check[88]: 239", "[jqli3sayeay6jxebdo6gg7uiifocbslkvp3hv7kgpgetcctr6kk] inputs_to_check[89]: 240", "[xfscvbmf5xe24dzns4sojux47uhkj3rx5escbpdnh74elnkufk4] inputs_to_check[90]: 241", "[h22xhecdg6loiwavuwtlmkjqgxj23bujpayu7r7cnuwow22zy2e] inputs_to_check[91]: 242", "[ffeweldbvparqtiiuqrj2k57nqdefczqg2x5n36j3cgfubpzl2r] inputs_to_check[92]: 243", "[ccpnfy7cohyrspuggdpbc7wpz4ul2fjni56cul6rgpztmmgrmbz] inputs_to_check[93]: 244", "[wdxh2vlxp5oip6lbdhdc5bur247q4qljosyhafat4nqrygspikw] inputs_to_check[94]: 245", "[imw7rafsd3kl6dhscu76cm37kz7cnlrn4cz4y77hnvu6epvrakh] inputs_to_check[95]: 246", "[s7byzhoacpzsci2aqrvogeb7f5f7egbzqw5uujgvswtjutgh33t] inputs_to_check[96]: 247", "[usr2ovytp5xi5pvkziuuu7bpg7zfa44xckrw7mx42ad2sixrfjw] inputs_to_check[97]: 248", "[spsub4jhn3s5o5rdysoeyv5mqczcmmn3t3hmuflioyvd4im4pfe] inputs_to_check[98]: 249", "[7c6i6h6bfell5u33q6rcv25lpgmk4jah3uhjjx6bjevvjnshoim] inputs_to_check[99]: 250", "[ah4sag2igwizdxkml6voaf342455hrlpr6cesdepe5njv6zahlq] inputs_to_check[100]: 251", "[5svbus4u4wap6a3z767wrjlymc7g7qft4ugaae65e5t7tvvigpv] inputs_to_check[101]: 252", "[6dwei2ltmufaindqw57by4jqhptur5xijexpjttzbqiw5xq7ufb] inputs_to_check[102]: 253", "[r557yxhzgnvuaqxq2y2aisgxqacm3cndl3efydbr3l5u7t6vaao] inputs_to_check[103]: 254", "[ouw6rzfmq6mznqvkp4ouhr3fzo3ljmtrqmyrm3pitapfmmfcq67] inputs_to_check[104]: 255", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inputs_to_check[105]: 256", "[bmucc7bpdzrvbf5petlmybacuupvmc7agiupfu54h73xhqupvaf] inputs_to_check[106]: 257", "[26uxlcwvg3bcy54iimat7oht3bxo7jvlwbqdppzk5zwkoxyv7cb] inputs_to_check[107]: 258", "[6ficbgfpvxoz32hthootgscev5fx7mrus4lxsrrnugdtmomziuz] inputs_to_check[108]: 259", "[4tsez6gljkkxdflgsfn46kmxnkkmjfm3foz2peoj2utbxjdodv2] inputs_to_check[109]: 260", "[677wque2vebz2rd32qmi2owsgvljzdjthgg7zhc67nb7s6z46xw] inputs_to_check[110]: 261", "[m7xjumsvlwcemhzme2dqw2usksuuan5qqnmqjaetwxiar4s4j6c] inputs_to_check[111]: 262", "[paeekzgtlydskvlpjv3e3j2zbrofnrddnfj3b6ronaopjyxkseo] inputs_to_check[112]: 263", "[rwjfz3vqhvl3rixynv6iipvly4la2yaqouw2ax5qrgr43ekinaz] inputs_to_check[113]: 264", "[3rcgr2jikn5dxbda5qrx57cu7lqsoqexxqcfzwsazgqojrjzsua] inputs_to_check[114]: 265", "[uqxikezud5mf2aqkp7seqvyze7hpn2o5c2yez6uydmlvjn2eh5v] inputs_to_check[115]: 266", "[wjcwolwbnxwp5mof62qvgpjq3ar5rzt6kkdytt6wev4udsfqe6x] inputs_to_check[116]: 267", "[veogf6bsf6ikphzxxhjgpzn5jqx3g55t4aoyurw7pm76yiy7zt2] inputs_to_check[117]: 268", "[vjdrwtc6q6dw6qossmadxe3ct3fx342xxf5ebrhb42tbtrjmtoj] inputs_to_check[118]: 269", "[fzi2ibchn6t3srl4xj73y4mywe6m4to7ns3ffl2tsei46mebm5l] inputs_to_check[119]: 270", "[54mteumeehrhr42ajb2mmomf5sssoeewc5jl6nqlnib5ohxb3qd] inputs_to_check[120]: 271", "[yk2ib3hhazfcmbecutfzw356dveofwjm5aokurcon6brky4fv62] inputs_to_check[121]: 272", "[qmgadyz6iluhzeytokynhwx3fiybdqurux3poi3kx7xdkpp24io] inputs_to_check[122]: 273", "[32k7zywxkomlofwth2mo5yip7d4f63vdnnwqucsjvfiviwhzqwj] inputs_to_check[123]: 274", "[ex5ejsi3yiu26ymqgxsfzxfongdoyfsvtn6wbfmjmqan26mywvv] inputs_to_check[124]: 275", "[3r3pihjgebqvjx6yoh4q36k6bmloajxb2wp7mpovqfmbgvtihjx] inputs_to_check[125]: 276", "[jcfhgvbhktjahasdkcg4j22c4iu5wgbqo2by6mvnhkizuyl7adt] inputs_to_check[126]: 277", "[qysgpar3mwuhkwfmkdvwppvqn2rc3wkswoy4l5242blg3s6nomq] inputs_to_check[127]: 278", "[sj5qpjzwfnrnrslghhva6z7fypbpicxjbczgdsscdg7ty6dloke] inputs_to_check[128]: 279", "[dh6oykkvzpw4hh2l2kyq3n3oiaawqasgyps2bki6ouaqwr5o4c5] inputs_to_check[129]: 280", "[bqqqhzw6zzkrdgeg4wed3ge2u7wrxxweyb7ikuugm2lg5bw2low] inputs_to_check[130]: 281", "[lpgee42ktycd2ec7bvvfmts5czoojvy5rglm2fz4boqbzvem3mz] inputs_to_check[131]: 282", "[xxcd2riuuqmc632el2www5z43brah2hzj66qz5c2bl4txi6tphi] inputs_to_check[132]: 283", "[lzq356tk2daemd3eejrqwmxfuprmzobz2v54vhsfmppeq35midf] inputs_to_check[133]: 284", "[jeqk32o5ugk777bosvm26wli4suonie2j7xeyvcnflm6sh2su4f] inputs_to_check[134]: 285", "[3nrlzlfgqdttgmpwe6ae4donvgjkzv5xalpsx6dkyop7d5e7owg] inputs_to_check[135]: 286", "[2cfsjfkfvrnfsi2dtyhpmzmogddssh6uxfsq3ydka2snuhaqy4s] inputs_to_check[136]: 287", "[d27xrkheycncdo3uzfumqtruedyl7pv2ur7to3lkeg7cjfaawja] inputs_to_check[137]: 288", "[o65ulls4ibkqdqeuckzqgselabavcbzln6kizmseggtkrra6k4k] inputs_to_check[138]: 289", "[e5avxq5la5yhcl3jslzu2qsr4tcolx35t2ujwwjr7lsqbhx2gk3] inputs_to_check[139]: 290", "[cbjwd3zv52u3h7bomxvmf6ynkx4wmtm6bqzzwkzlmyr2ict4kfn] inputs_to_check[140]: 291", "[fpvvfys36hfg7uwq5l6ekyjnvf3tjpbf4d5cxo4webm7epzhpvt] inputs_to_check[141]: 292", "[bhuauv3brrxmr45r7yueymn76n3bwlyfrcrqtsbuok4ipqa5d2q] inputs_to_check[142]: 293", "[n5u6kpqzxtau4hisgec3wulumses6yh323wd6fnttpwm42i3j7x] inputs_to_check[143]: 294", "[rtapjyb4o2hwk4hyf4ep7oeikdbv4zq2ni4dilcfjnjo4sgwzvz] inputs_to_check[144]: 295", "[6aadk5hp6aqszjgpca65txkgn7cp4wttn7o6q4uv5br7qu4ubxq] inputs_to_check[145]: 296", "[ptxf4kphvduiofe7xbem4isrkenfdki3oegb53qdm2jzkbn26ed] inputs_to_check[146]: 297", "[5pxadwvblqbojkxsf7lbkowi52nvhflb3rx456ro3uostqlb7ky] inputs_to_check[147]: 298", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True)", "[7sgsc55kfjzwis4uo5qss3whnvhbsher24yq6kx6s5kpsrkxea6] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[sp37civic5nfnonkkbzih3xfntpo3gcmphcbajuw565ur3hduzs] system_info[version]: {'triton': '3.0.0+45fff310c8d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-f7c9bcb1b9106f4f2459ef1b70df75315076b76babe2a4ba9e0b3b5a40697757-d412cc506d0c1c7cf83e442e39f90ca1a9dbd4346ecd342591748e57a184b0a3-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-c9cccccc1377e5b8c152e8449fa73a2b3660a899bebac0581c99e4e368a61cde-e82522398a0a43b3c7db9c7fa26ab086260c0981e59942e1df54ddc669d7b78c-cf28658fa328f7f283ec4e6ccc6c48d7c2a8ddbdf5134d3eb35c9b38ce4ace44-b9d80690b3109c2aaf5ece450d62e93b37eb6ab38552089794b3bb36e36a22b3-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-572e6cbc0b9ae0089330aa5d115d10cfc16bd45a69e3ca362c902b9c04b5280b-412a0e8de51f28c2de98b3343804b2949ede2cb593052a0eb0ba3e6a9a1812d0', 'cuda': '12.0'}", "[zapn5tv6636hi2hdgg2j7wqiwxjbrqbqa4vrgrnuli5v34salwn] system_info[hash]: ee3e5029961cf964c22a83f0ca3586fcab4a36649a1478e6293b5382433b23d3", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[abi_compatible]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_dump_consts_bin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.generate_test_runner]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[post_grad_custom_pre_pass]: None", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[gtkv35cxmtt6tr556buxi277a67g25mjojnv32dc4bjvc7bwscw] inductor_config[pre_grad_fusion_options]: {'batch_linear': {}, 'batch_linear_lhs': {}, 'batch_layernorm': {}, 'batch_tanh': {}, 'batch_relu': {}, 'batch_sigmoid': {}}", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_at_compile_time]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraphs]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_kernel_names]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[hofygoznqmna6yvgsc6itdddi4hxftssgegh6wquixg2yng3a3z] inductor_config[worker_start_method]: subprocess"]}
+V0806 13:56:22.388000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "6c36bbd6a016d6ab1b6561b47ca221d0"}
+	{
+	"name": "inductor_compile",
+	"ts": 1722977782388950.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.389000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "5a6ab3e94cda31c941d856ec9820e2cf"}
+	{
+	"name": "compile_fx_inner",
+	"ts": 1722977782389036.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.389000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "06725ec53706eedc69b933b807c07e00"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1722977782389184.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.392000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "adb5528271cf584b9f24addb2c176a5b"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1722977782392701.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.393000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "4a34a3f7c77ee09c5d0a426eab9bc264"}
+	{
+	"name": "backend_compile",
+	"ts": 1722977782393001.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.393000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "de5fc7b04fee8c81075888ee727b085a"}
+	{
+	"name": "OutputGraph.call_user_compiler",
+	"ts": 1722977782393078.2,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.655000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0, "has_payload": "3f414ff92a2c0fbe9a3d2f5d48d6cd73"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor(self)
+	| | +- ID_MATCH: ___check_obj_id(L['self'], 140561654732528)                 
+	| | +- NO_HASATTR: not hasattr(L['self'], 'found_inf')                         
+	| | +- NO_HASATTR: not hasattr(L['self'], 'grad_scale')                        
+	| | +- DictGuardManager: source=L['self'].state, accessed_by=GetAttrGuardAccessor(state)
+	| | | +- KeyValueManager pair at index=0
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['step'], 140561608014112)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['self'].param_groups[0]['params'][0], L['self'].param_groups[0]['params'][1], L['self'].param_groups[0]['params'][2], L['self'].param_groups[0]['params'][3], L['self'].param_groups[0]['params'][4], L['self'].param_groups[0]['params'][5], L['self'].param_groups[0]['params'][6], L['self'].param_groups[0]['params'][7], L['self'].param_groups[0]['params'][8], L['self'].param_groups[0]['params'][9], L['self'].param_groups[0]['params'][10], L['self'].param_groups[0]['params'][11], L['self'].param_groups[0]['params'][12], L['self'].param_groups[0]['params'][13], L['self'].param_groups[0]['params'][14], L['self'].param_groups[0]['params'][15], L['self'].param_groups[0]['params'][16], L['self'].param_groups[0]['params'][17], L['self'].param_groups[0]['params'][18], L['self'].param_groups[0]['params'][19], L['self'].param_groups[0]['params'][20], L['self'].param_groups[0]['params'][21], L['self'].param_groups[0]['params'][22], L['self'].param_groups[0]['params'][23], L['self'].param_groups[0]['params'][24], L['self'].param_groups[0]['params'][25], L['self'].param_groups[0]['params'][26], L['self'].param_groups[0]['params'][27], L['self'].param_groups[0]['params'][28], L['self'].param_groups[0]['params'][29], L['self'].param_groups[0]['params'][30], L['self'].param_groups[0]['params'][31], L['self'].param_groups[0]['params'][32], L['self'].param_groups[0]['params'][33], L['self'].param_groups[0]['params'][34], L['self'].param_groups[0]['params'][35], L['self'].param_groups[0]['params'][36], L['self'].param_groups[0]['params'][37], L['self'].param_groups[0]['params'][38], L['self'].param_groups[0]['params'][39], L['self'].param_groups[0]['params'][40], L['self'].param_groups[0]['params'][41], L['self'].param_groups[0]['params'][42], L['self'].param_groups[0]['params'][43], L['self'].param_groups[0]['params'][44], L['self'].param_groups[0]['params'][45], L['self'].param_groups[0]['params'][46], L['self'].param_groups[0]['params'][47], L['self'].param_groups[0]['params'][48], L['self'].param_groups[0]['params'][49], L['self'].param_groups[0]['params'][50], L['self'].param_groups[0]['params'][51], L['self'].param_groups[0]['params'][52], L['self'].param_groups[0]['params'][53], L['self'].param_groups[0]['params'][54], L['self'].param_groups[0]['params'][55], L['self'].param_groups[0]['params'][56], L['self'].param_groups[0]['params'][57], L['self'].param_groups[0]['params'][58], L['self'].param_groups[0]['params'][59], L['self'].param_groups[0]['params'][60], L['self'].param_groups[0]['params'][61], L['self'].param_groups[0]['params'][62], L['self'].param_groups[0]['params'][63], L['self'].param_groups[0]['params'][64], L['self'].param_groups[0]['params'][65], L['self'].param_groups[0]['params'][66], L['self'].param_groups[0]['params'][67], L['self'].param_groups[0]['params'][68], L['self'].param_groups[0]['params'][69], L['self'].param_groups[0]['params'][70], L['self'].param_groups[0]['params'][71], L['self'].param_groups[0]['params'][72], L['self'].param_groups[0]['params'][73], L['self'].param_groups[0]['params'][74], L['self'].param_groups[0]['params'][75], L['self'].param_groups[0]['params'][76], L['self'].param_groups[0]['params'][77], L['self'].param_groups[0]['params'][78], L['self'].param_groups[0]['params'][79], L['self'].param_groups[0]['params'][80], L['self'].param_groups[0]['params'][81], L['self'].param_groups[0]['params'][82], L['self'].param_groups[0]['params'][83], L['self'].param_groups[0]['params'][84], L['self'].param_groups[0]['params'][85], L['self'].param_groups[0]['params'][86], L['self'].param_groups[0]['params'][87], L['self'].param_groups[0]['params'][88], L['self'].param_groups[0]['params'][89], L['self'].param_groups[0]['params'][90], L['self'].param_groups[0]['params'][91], L['self'].param_groups[0]['params'][92], L['self'].param_groups[0]['params'][93], L['self'].param_groups[0]['params'][94], L['self'].param_groups[0]['params'][95], L['self'].param_groups[0]['params'][96], L['self'].param_groups[0]['params'][97], L['self'].param_groups[0]['params'][98], L['self'].param_groups[0]['params'][99], L['self'].param_groups[0]['params'][100], L['self'].param_groups[0]['params'][101], L['self'].param_groups[0]['params'][102], L['self'].param_groups[0]['params'][103], L['self'].param_groups[0]['params'][104], L['self'].param_groups[0]['params'][105], L['self'].param_groups[0]['params'][106], L['self'].param_groups[0]['params'][107], L['self'].param_groups[0]['params'][108], L['self'].param_groups[0]['params'][109], L['self'].param_groups[0]['params'][110], L['self'].param_groups[0]['params'][111], L['self'].param_groups[0]['params'][112], L['self'].param_groups[0]['params'][113], L['self'].param_groups[0]['params'][114], L['self'].param_groups[0]['params'][115], L['self'].param_groups[0]['params'][116], L['self'].param_groups[0]['params'][117], L['self'].param_groups[0]['params'][118], L['self'].param_groups[0]['params'][119], L['self'].param_groups[0]['params'][120], L['self'].param_groups[0]['params'][121], L['self'].param_groups[0]['params'][122], L['self'].param_groups[0]['params'][123], L['self'].param_groups[0]['params'][124], L['self'].param_groups[0]['params'][125], L['self'].param_groups[0]['params'][126], L['self'].param_groups[0]['params'][127], L['self'].param_groups[0]['params'][128], L['self'].param_groups[0]['params'][129], L['self'].param_groups[0]['params'][130], L['self'].param_groups[0]['params'][131], L['self'].param_groups[0]['params'][132], L['self'].param_groups[0]['params'][133], L['self'].param_groups[0]['params'][134], L['self'].param_groups[0]['params'][135], L['self'].param_groups[0]['params'][136], L['self'].param_groups[0]['params'][137], L['self'].param_groups[0]['params'][138], L['self'].param_groups[0]['params'][139], L['self'].param_groups[0]['params'][140], L['self'].param_groups[0]['params'][141], L['self'].param_groups[0]['params'][142], L['self'].param_groups[0]['params'][143], L['self'].param_groups[0]['params'][144], L['self'].param_groups[0]['params'][145], L['self'].param_groups[0]['params'][146], L['self'].param_groups[0]['params'][147], L['self'].state[list(L['self'].state.keys())[0]]['step'], L['self'].state[list(L['self'].state.keys())[1]]['step'], L['self'].state[list(L['self'].state.keys())[2]]['step'], L['self'].state[list(L['self'].state.keys())[3]]['step'], L['self'].state[list(L['self'].state.keys())[4]]['step'], L['self'].state[list(L['self'].state.keys())[5]]['step'], L['self'].state[list(L['self'].state.keys())[6]]['step'], L['self'].state[list(L['self'].state.keys())[7]]['step'], L['self'].state[list(L['self'].state.keys())[8]]['step'], L['self'].state[list(L['self'].state.keys())[9]]['step'], L['self'].state[list(L['self'].state.keys())[10]]['step'], L['self'].state[list(L['self'].state.keys())[11]]['step'], L['self'].state[list(L['self'].state.keys())[12]]['step'], L['self'].state[list(L['self'].state.keys())[13]]['step'], L['self'].state[list(L['self'].state.keys())[14]]['step'], L['self'].state[list(L['self'].state.keys())[15]]['step'], L['self'].state[list(L['self'].state.keys())[16]]['step'], L['self'].state[list(L['self'].state.keys())[17]]['step'], L['self'].state[list(L['self'].state.keys())[18]]['step'], L['self'].state[list(L['self'].state.keys())[19]]['step'], L['self'].state[list(L['self'].state.keys())[20]]['step'], L['self'].state[list(L['self'].state.keys())[21]]['step'], L['self'].state[list(L['self'].state.keys())[22]]['step'], L['self'].state[list(L['self'].state.keys())[23]]['step'], L['self'].state[list(L['self'].state.keys())[24]]['step'], L['self'].state[list(L['self'].state.keys())[25]]['step'], L['self'].state[list(L['self'].state.keys())[26]]['step'], L['self'].state[list(L['self'].state.keys())[27]]['step'], L['self'].state[list(L['self'].state.keys())[28]]['step'], L['self'].state[list(L['self'].state.keys())[29]]['step'], L['self'].state[list(L['self'].state.keys())[30]]['step'], L['self'].state[list(L['self'].state.keys())[31]]['step'], L['self'].state[list(L['self'].state.keys())[32]]['step'], L['self'].state[list(L['self'].state.keys())[33]]['step'], L['self'].state[list(L['self'].state.keys())[34]]['step'], L['self'].state[list(L['self'].state.keys())[35]]['step'], L['self'].state[list(L['self'].state.keys())[36]]['step'], L['self'].state[list(L['self'].state.keys())[37]]['step'], L['self'].state[list(L['self'].state.keys())[38]]['step'], L['self'].state[list(L['self'].state.keys())[39]]['step'], L['self'].state[list(L['self'].state.keys())[40]]['step'], L['self'].state[list(L['self'].state.keys())[41]]['step'], L['self'].state[list(L['self'].state.keys())[42]]['step'], L['self'].state[list(L['self'].state.keys())[43]]['step'], L['self'].state[list(L['self'].state.keys())[44]]['step'], L['self'].state[list(L['self'].state.keys())[45]]['step'], L['self'].state[list(L['self'].state.keys())[46]]['step'], L['self'].state[list(L['self'].state.keys())[47]]['step'], L['self'].state[list(L['self'].state.keys())[48]]['step'], L['self'].state[list(L['self'].state.keys())[49]]['step'], L['self'].state[list(L['self'].state.keys())[50]]['step'], L['self'].state[list(L['self'].state.keys())[51]]['step'], L['self'].state[list(L['self'].state.keys())[52]]['step'], L['self'].state[list(L['self'].state.keys())[53]]['step'], L['self'].state[list(L['self'].state.keys())[54]]['step'], L['self'].state[list(L['self'].state.keys())[55]]['step'], L['self'].state[list(L['self'].state.keys())[56]]['step'], L['self'].state[list(L['self'].state.keys())[57]]['step'], L['self'].state[list(L['self'].state.keys())[58]]['step'], L['self'].state[list(L['self'].state.keys())[59]]['step'], L['self'].state[list(L['self'].state.keys())[60]]['step'], L['self'].state[list(L['self'].state.keys())[61]]['step'], L['self'].state[list(L['self'].state.keys())[62]]['step'], L['self'].state[list(L['self'].state.keys())[63]]['step'], L['self'].state[list(L['self'].state.keys())[64]]['step'], L['self'].state[list(L['self'].state.keys())[65]]['step'], L['self'].state[list(L['self'].state.keys())[66]]['step'], L['self'].state[list(L['self'].state.keys())[67]]['step'], L['self'].state[list(L['self'].state.keys())[68]]['step'], L['self'].state[list(L['self'].state.keys())[69]]['step'], L['self'].state[list(L['self'].state.keys())[70]]['step'], L['self'].state[list(L['self'].state.keys())[71]]['step'], L['self'].state[list(L['self'].state.keys())[72]]['step'], L['self'].state[list(L['self'].state.keys())[73]]['step'], L['self'].state[list(L['self'].state.keys())[74]]['step'], L['self'].state[list(L['self'].state.keys())[75]]['step'], L['self'].state[list(L['self'].state.keys())[76]]['step'], L['self'].state[list(L['self'].state.keys())[77]]['step'], L['self'].state[list(L['self'].state.keys())[78]]['step'], L['self'].state[list(L['self'].state.keys())[79]]['step'], L['self'].state[list(L['self'].state.keys())[80]]['step'], L['self'].state[list(L['self'].state.keys())[81]]['step'], L['self'].state[list(L['self'].state.keys())[82]]['step'], L['self'].state[list(L['self'].state.keys())[83]]['step'], L['self'].state[list(L['self'].state.keys())[84]]['step'], L['self'].state[list(L['self'].state.keys())[85]]['step'], L['self'].state[list(L['self'].state.keys())[86]]['step'], L['self'].state[list(L['self'].state.keys())[87]]['step'], L['self'].state[list(L['self'].state.keys())[88]]['step'], L['self'].state[list(L['self'].state.keys())[89]]['step'], L['self'].state[list(L['self'].state.keys())[90]]['step'], L['self'].state[list(L['self'].state.keys())[91]]['step'], L['self'].state[list(L['self'].state.keys())[92]]['step'], L['self'].state[list(L['self'].state.keys())[93]]['step'], L['self'].state[list(L['self'].state.keys())[94]]['step'], L['self'].state[list(L['self'].state.keys())[95]]['step'], L['self'].state[list(L['self'].state.keys())[96]]['step'], L['self'].state[list(L['self'].state.keys())[97]]['step'], L['self'].state[list(L['self'].state.keys())[98]]['step'], L['self'].state[list(L['self'].state.keys())[99]]['step'], L['self'].state[list(L['self'].state.keys())[100]]['step'], L['self'].state[list(L['self'].state.keys())[101]]['step'], L['self'].state[list(L['self'].state.keys())[102]]['step'], L['self'].state[list(L['self'].state.keys())[103]]['step'], L['self'].state[list(L['self'].state.keys())[104]]['step'], L['self'].state[list(L['self'].state.keys())[105]]['step'], L['self'].state[list(L['self'].state.keys())[106]]['step'], L['self'].state[list(L['self'].state.keys())[107]]['step'], L['self'].state[list(L['self'].state.keys())[108]]['step'], L['self'].state[list(L['self'].state.keys())[109]]['step'], L['self'].state[list(L['self'].state.keys())[110]]['step'], L['self'].state[list(L['self'].state.keys())[111]]['step'], L['self'].state[list(L['self'].state.keys())[112]]['step'], L['self'].state[list(L['self'].state.keys())[113]]['step'], L['self'].state[list(L['self'].state.keys())[114]]['step'], L['self'].state[list(L['self'].state.keys())[115]]['step'], L['self'].state[list(L['self'].state.keys())[116]]['step'], L['self'].state[list(L['self'].state.keys())[117]]['step'], L['self'].state[list(L['self'].state.keys())[118]]['step'], L['self'].state[list(L['self'].state.keys())[119]]['step'], L['self'].state[list(L['self'].state.keys())[120]]['step'], L['self'].state[list(L['self'].state.keys())[121]]['step'], L['self'].state[list(L['self'].state.keys())[122]]['step'], L['self'].state[list(L['self'].state.keys())[123]]['step'], L['self'].state[list(L['self'].state.keys())[124]]['step'], L['self'].state[list(L['self'].state.keys())[125]]['step'], L['self'].state[list(L['self'].state.keys())[126]]['step'], L['self'].state[list(L['self'].state.keys())[127]]['step'], L['self'].state[list(L['self'].state.keys())[128]]['step'], L['self'].state[list(L['self'].state.keys())[129]]['step'], L['self'].state[list(L['self'].state.keys())[130]]['step'], L['self'].state[list(L['self'].state.keys())[131]]['step'], L['self'].state[list(L['self'].state.keys())[132]]['step'], L['self'].state[list(L['self'].state.keys())[133]]['step'], L['self'].state[list(L['self'].state.keys())[134]]['step'], L['self'].state[list(L['self'].state.keys())[135]]['step'], L['self'].state[list(L['self'].state.keys())[136]]['step'], L['self'].state[list(L['self'].state.keys())[137]]['step'], L['self'].state[list(L['self'].state.keys())[138]]['step'], L['self'].state[list(L['self'].state.keys())[139]]['step'], L['self'].state[list(L['self'].state.keys())[140]]['step'], L['self'].state[list(L['self'].state.keys())[141]]['step'], L['self'].state[list(L['self'].state.keys())[142]]['step'], L['self'].state[list(L['self'].state.keys())[143]]['step'], L['self'].state[list(L['self'].state.keys())[144]]['step'], L['self'].state[list(L['self'].state.keys())[145]]['step'], L['self'].state[list(L['self'].state.keys())[146]]['step'], L['self'].state[list(L['self'].state.keys())[147]]['step'], L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'])
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], 140561608014592)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[50304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], 140561608013952)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[50304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[0]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=1
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]
+	| | | | | +- DICT_LENGTH: len(L['self'].state[list(L['self'].state.keys())[1]]) == 3  
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['step'], 140561608013792)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], 140561608013392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1024, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], 140561608013632)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1024, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[1]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=2
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['step'], 140561608013712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], 140561608013472)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], 140561608013072)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[2]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=3
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['step'], 140561608012992)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], 140561608013312)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], 140561608013232)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[3]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=4
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['step'], 140561608012832)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], 140561608012432)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], 140561608012672)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[4]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=5
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['step'], 140561608012752)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], 140561608012512)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], 140561608012112)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[5]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=6
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['step'], 140561608012032)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], 140561608012352)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], 140561608012272)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[6]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=7
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['step'], 140561608011872)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], 140561608011472)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], 140561608011712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[7]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=8
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['step'], 140561608011792)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], 140561608011552)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], 140561608010672)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[8]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=9
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['step'], 140561608010032)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], 140561608011392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], 140561608011312)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[9]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=10
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['step'], 140561608004752)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], 140561608005312)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], 140561608004512)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[10]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=11
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['step'], 140561608004912)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], 140561608005232)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], 140561608005392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[11]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=12
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['step'], 140561608005712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], 140561608004592)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], 140561608005072)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[12]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=13
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['step'], 140561608005872)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], 140561608006272)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], 140561608005632)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[13]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=14
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['step'], 140561608004992)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], 140561608005952)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], 140561608006192)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[14]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=15
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['step'], 140561608006592)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], 140561608006352)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], 140561608006432)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[15]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=16
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['step'], 140561608006832)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], 140561608007152)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], 140561608006992)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[16]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=17
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['step'], 140561608006912)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], 140561608006752)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], 140561608007552)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[17]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=18
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['step'], 140561608006672)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], 140561608007072)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], 140561608007392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[18]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=19
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['step'], 140561608007712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], 140561608007232)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], 140561608007952)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[19]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=20
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['step'], 140561608007632)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], 140561608008112)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], 140561608008512)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[20]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=21
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['step'], 140561608008592)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], 140561608007872)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], 140561608008272)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[21]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=22
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['step'], 140561608007792)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], 140561608009152)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], 140561608008832)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[22]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=23
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['step'], 140561608008432)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], 140561608009072)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], 140561608009392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[23]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=24
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['step'], 140561608009312)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], 140561608009232)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], 140561608008352)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[24]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=25
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['step'], 140561608009712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], 140561608009872)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], 140561608008912)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[25]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=26
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['step'], 140561608009792)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], 140561608009952)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], 140561608010112)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[26]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=27
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['step'], 140561608010432)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], 140561608010192)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], 140561608010272)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[27]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=28
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['step'], 140561608010592)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], 140561608011072)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], 140561608010752)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[28]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=29
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['step'], 140561608004672)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], 140561608010912)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], 140561608009472)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[29]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=30
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['step'], 140562023536560)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], 140561757337888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], 140561654532512)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[30]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=31
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['step'], 140561618656672)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], 140561654098144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], 140561654096624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[31]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=32
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['step'], 140561654099184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], 140561654098384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], 140561654097904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[32]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=33
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['step'], 140561654097424)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], 140561654098304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], 140561608484688)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[33]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=34
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['step'], 140561608484368)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], 140561608484608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], 140561608483488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[34]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=35
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['step'], 140561608483888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], 140561608484528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], 140561608483808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[35]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=36
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['step'], 140561608483328)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], 140561608483648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], 140561608483168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[36]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=37
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['step'], 140561608482848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], 140561608482528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], 140561608481968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[37]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=38
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['step'], 140561608482288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], 140561608482208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], 140561608482128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[38]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=39
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['step'], 140561608481648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], 140561608481568)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], 140561608481408)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[39]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=40
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['step'], 140561608481168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], 140561608480768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], 140561608480208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[40]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=41
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['step'], 140561608480528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], 140561608480448)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], 140561608480368)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[41]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=42
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['step'], 140561608479328)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], 140561608479888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], 140561608479728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[42]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=43
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['step'], 140561608479488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], 140561608479088)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], 140561608478528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[43]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=44
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['step'], 140561608478848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], 140561608478768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], 140561608478688)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[44]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=45
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['step'], 140561608477648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], 140561608478208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], 140561608478048)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[45]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=46
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['step'], 140561608477808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], 140561608477408)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], 140561608476848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[46]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=47
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['step'], 140561608477168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], 140561608477088)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], 140561608477008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[47]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=48
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['step'], 140561608475968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], 140561608476528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], 140561608476368)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[48]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=49
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['step'], 140561608476128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], 140561608475808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], 140561608484768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[49]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=50
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['step'], 140561608484128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], 140561608483408)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], 140561608482608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[50]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=51
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['step'], 140561608480848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], 140561608480128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], 140561608479168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[51]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=52
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['step'], 140561608477488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], 140561608476768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], 140561608475728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[52]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=53
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['step'], 140561608484448)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], 140561608483968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], 140561608483568)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[53]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=54
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['step'], 140561608480928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], 140561608482928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], 140561608482448)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[54]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=55
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['step'], 140561608481728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], 140561608481008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], 140561608481488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[55]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=56
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['step'], 140561608481088)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], 140561608480688)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], 140561608480288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[56]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=57
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['step'], 140561608479248)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], 140561608479808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], 140561608479568)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[57]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=58
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['step'], 140561608479008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], 140561608478608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], 140561608478288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[58]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=59
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['step'], 140561608478128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], 140561608477888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], 140561608477728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[59]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=60
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['step'], 140561608476928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], 140561608476608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], 140561608475888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[60]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=61
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['step'], 140561608476208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], 140561608476048)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], 140561608485008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[61]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=62
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['step'], 140561608485088)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], 140561608485168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], 140561608484928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[62]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=63
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['step'], 140561608485328)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], 140561608485408)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], 140561608485488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[63]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=64
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['step'], 140561608485648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], 140561608485728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], 140561608485808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[64]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=65
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['step'], 140561608485968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], 140561608486048)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], 140561608486128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[65]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=66
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['step'], 140561608486288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], 140561608486368)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], 140561608486448)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[66]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=67
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['step'], 140561608486608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], 140561608486688)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], 140561608486768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[67]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=68
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['step'], 140561608486928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], 140561608487008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], 140561608487088)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[68]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=69
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['step'], 140561608487248)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], 140561608487328)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], 140561608487408)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[69]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=70
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['step'], 140561608487568)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], 140561608487648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], 140561608487728)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[70]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=71
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['step'], 140561608487888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], 140561608487968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], 140561608488048)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[71]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=72
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['step'], 140561608488208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], 140561608488288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], 140561608488368)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[72]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=73
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['step'], 140561608488528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], 140561608488608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], 140561608488688)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[73]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=74
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['step'], 140561608488848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], 140561608488928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], 140561608489008)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[74]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=75
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['step'], 140561608489168)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], 140561608489248)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], 140561608489328)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[75]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=76
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['step'], 140561608489488)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], 140561608489568)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], 140561608489648)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[76]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=77
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['step'], 140561608489808)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], 140561608489888)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], 140561608489968)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[77]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=78
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['step'], 140561608490128)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], 140561608490208)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], 140561608490288)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[78]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=79
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['step'], 140561608490448)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], 140561608490528)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], 140561608490608)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[79]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=80
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['step'], 140561608490768)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], 140561608490848)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], 140561608490928)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[80]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=81
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['step'], 140561608261776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], 140561608261856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], 140561608261936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[81]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=82
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['step'], 140561608262096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], 140561608262176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], 140561608262256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[82]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=83
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['step'], 140561608262416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], 140561608262496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], 140561608262576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[83]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=84
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['step'], 140561608262736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], 140561608262816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], 140561608262896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[84]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=85
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['step'], 140561608263056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], 140561608263136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], 140561608263216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[85]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=86
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['step'], 140561608263376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], 140561608263456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], 140561608263536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[86]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=87
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['step'], 140561608263696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], 140561608263776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], 140561608263856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[87]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=88
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['step'], 140561608264016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], 140561608264096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], 140561608264176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[88]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=89
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['step'], 140561608264336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], 140561608264416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], 140561608264496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[89]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=90
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['step'], 140561608264656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], 140561608264736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], 140561608264816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[90]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=91
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['step'], 140561608264976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], 140561608265056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], 140561608265136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[91]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=92
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['step'], 140561608265296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], 140561608265376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], 140561608265456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[92]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=93
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['step'], 140561608265616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], 140561608265696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], 140561608265776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[93]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=94
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['step'], 140561608265936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], 140561608266016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], 140561608266096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[94]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=95
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['step'], 140561608266256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], 140561608266336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], 140561608266416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[95]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=96
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['step'], 140561608266576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], 140561608266656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], 140561608266736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[96]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=97
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['step'], 140561608266896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], 140561608266976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], 140561608267056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[97]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=98
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['step'], 140561608267216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], 140561608267296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], 140561608267376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[98]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=99
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['step'], 140561608267536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], 140561608267616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], 140561608267696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[99]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=100
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['step'], 140561608267856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], 140561608267936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], 140561608268016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[100]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=101
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['step'], 140561608268176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], 140561608268256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], 140561608268336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[101]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=102
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['step'], 140561608268496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], 140561608268576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], 140561608268656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[102]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=103
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['step'], 140561608268816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], 140561608268896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], 140561608268976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[103]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=104
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['step'], 140561608269136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], 140561608269216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], 140561608269296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[104]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=105
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['step'], 140561608269456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], 140561608269536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], 140561608269616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[105]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=106
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['step'], 140561608269776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], 140561608269856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], 140561608269936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[106]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=107
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['step'], 140561608270096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], 140561608270176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], 140561608270256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[107]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=108
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['step'], 140561608270416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], 140561608270496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], 140561608270576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[108]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=109
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['step'], 140561608270736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], 140561608270816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], 140561608270896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[109]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=110
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['step'], 140561608271056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], 140561608271136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], 140561608271216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[110]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=111
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['step'], 140561608271376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], 140561608271456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], 140561608271536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[111]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=112
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['step'], 140561608271696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], 140561608271776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], 140561608271856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[112]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=113
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['step'], 140561608272016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], 140561608272096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], 140561608272176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[113]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=114
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['step'], 140561608272336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], 140561608272416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], 140561608272496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[114]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=115
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['step'], 140561608272656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], 140561608272736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], 140561608272816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[115]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=116
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['step'], 140561608272976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], 140561608273056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], 140561608273136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[116]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=117
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['step'], 140561608273296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], 140561608273376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], 140561608273456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[117]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=118
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['step'], 140561608273616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], 140561608273696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], 140561608273776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[118]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=119
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['step'], 140561608273936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], 140561608274016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], 140561608274096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[119]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=120
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['step'], 140561608274256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], 140561608274336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], 140561608274416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[120]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=121
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['step'], 140561608274576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], 140561608274656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], 140561608274736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[121]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=122
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['step'], 140561608274896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], 140561608274976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], 140561608275056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[122]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=123
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['step'], 140561608275216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], 140561608275296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], 140561608275376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[123]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=124
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['step'], 140561608275536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], 140561608275616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], 140561608275696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[124]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=125
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['step'], 140561608275856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], 140561608275936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], 140561608276016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[125]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=126
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['step'], 140561608276176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], 140561608276256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], 140561608276336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[126]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=127
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['step'], 140561608276496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], 140561608276576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], 140561608276656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[127]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=128
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['step'], 140561608276816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], 140561608276896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], 140561608276976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[128]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=129
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['step'], 140561608277136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], 140561608277216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], 140561608277296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[129]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=130
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['step'], 140561608277456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], 140561608277536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], 140561608277616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[130]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=131
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['step'], 140561608277776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], 140561608277856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], 140561608277936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[131]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=132
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['step'], 140561608507536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], 140561608507616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], 140561608507696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[132]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=133
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['step'], 140561608507856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], 140561608507936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], 140561608508016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[133]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=134
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['step'], 140561608508176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], 140561608508256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], 140561608508336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[134]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=135
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['step'], 140561608508496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], 140561608508576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], 140561608508656)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[135]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=136
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['step'], 140561608508816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], 140561608508896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], 140561608508976)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[136]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=137
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['step'], 140561608509136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], 140561608509216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], 140561608509296)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[2304], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[137]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=138
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['step'], 140561608509456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], 140561608509536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], 140561608509616)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[138]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=139
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['step'], 140561608509776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], 140561608509856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], 140561608509936)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[139]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=140
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['step'], 140561608510096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], 140561608510176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], 140561608510256)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[140]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=141
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['step'], 140561608510416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], 140561608510496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], 140561608510576)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[141]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=142
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['step'], 140561608510736)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], 140561608510816)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], 140561608510896)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[142]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=143
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['step'], 140561608511056)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], 140561608511136)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], 140561608511216)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[3072], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[143]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=144
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['step'], 140561608511376)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], 140561608511456)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], 140561608511536)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[144]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=145
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['step'], 140561608511696)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], 140561608511776)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], 140561608511856)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[145]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=146
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['step'], 140561608512016)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], 140561608512096)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], 140561608512176)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[146]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- KeyValueManager pair at index=147
+	| | | | +- ValueManager: GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['step'], accessed_by=DictGetItemGuardAccessor(step)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['step'], 140561608512336)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['step'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[], stride=[])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['step'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], accessed_by=DictGetItemGuardAccessor(exp_avg)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], 140561608512416)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | +- GuardManager: source=L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], accessed_by=DictGetItemGuardAccessor(exp_avg_sq)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], 140561608512496)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[768], stride=[1])
+	| | | | | | +- NO_HASATTR: hasattr(L['self'].state[list(L['self'].state.keys())[147]]['exp_avg_sq'], '_dynamo_dynamic_indices') == False
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | +- GuardManager: source=L['self'].state.default_factory, accessed_by=GetAttrGuardAccessor(default_factory)
+	| | | | +- ID_MATCH: ___check_obj_id(L['self'].state.default_factory, 94206128762464)
+	| | +- GuardManager: source=L['self'].param_groups, accessed_by=GetAttrGuardAccessor(param_groups)
+	| | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups, 94206128766016)    
+	| | | +- LENGTH_CHECK: len(L['self'].param_groups) == 1                            
+	| | | +- GuardManager: source=L['self'].param_groups[0], accessed_by=ListGetItemGuardAccessor(0)
+	| | | | +- DICT_LENGTH: len(L['self'].param_groups[0]) == 11                        
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['params'], accessed_by=DictGetItemGuardAccessor(params)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups[0]['params'], 94206128766016)
+	| | | | | +- LENGTH_CHECK: len(L['self'].param_groups[0]['params']) == 148             
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][0], accessed_by=ListGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][0], 140561606584704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][0], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[50304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][0].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][0].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][1], accessed_by=ListGetItemGuardAccessor(1)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][1], 140561606574304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][1], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1024, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][1].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][1].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][2], accessed_by=ListGetItemGuardAccessor(2)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][2], 140561606584384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][2], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][2].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][2].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][3], accessed_by=ListGetItemGuardAccessor(3)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][3], 140561606584544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][3], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][3].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][3].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][4], accessed_by=ListGetItemGuardAccessor(4)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][4], 140561606583584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][4], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][4].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][4].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][5], accessed_by=ListGetItemGuardAccessor(5)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][5], 140561606583504)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][5], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][5].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][5].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][6], accessed_by=ListGetItemGuardAccessor(6)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][6], 140561606583824)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][6], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][6].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][6].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][7], accessed_by=ListGetItemGuardAccessor(7)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][7], 140561606583104)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][7], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][7].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][7].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][8], accessed_by=ListGetItemGuardAccessor(8)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][8], 140561606583904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][8], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][8].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][8].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][9], accessed_by=ListGetItemGuardAccessor(9)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][9], 140561606584464)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][9], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][9].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][9].grad is not None     
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][10], accessed_by=ListGetItemGuardAccessor(10)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][10], 140561606582224)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][10], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][10].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][10].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][11], accessed_by=ListGetItemGuardAccessor(11)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][11], 140561606582544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][11], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][11].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][11].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][12], accessed_by=ListGetItemGuardAccessor(12)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][12], 140561606583024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][12], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][12].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][12].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][13], accessed_by=ListGetItemGuardAccessor(13)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][13], 140561606581584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][13], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][13].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][13].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][14], accessed_by=ListGetItemGuardAccessor(14)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][14], 140561606582704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][14], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][14].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][14].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][15], accessed_by=ListGetItemGuardAccessor(15)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][15], 140561606583664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][15], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][15].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][15].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][16], accessed_by=ListGetItemGuardAccessor(16)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][16], 140561606581424)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][16], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][16].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][16].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][17], accessed_by=ListGetItemGuardAccessor(17)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][17], 140561606581744)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][17], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][17].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][17].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][18], accessed_by=ListGetItemGuardAccessor(18)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][18], 140561606582064)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][18], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][18].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][18].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][19], accessed_by=ListGetItemGuardAccessor(19)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][19], 140561606580944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][19], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][19].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][19].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][20], accessed_by=ListGetItemGuardAccessor(20)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][20], 140561606582144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][20], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][20].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][20].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][21], accessed_by=ListGetItemGuardAccessor(21)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][21], 140561606583184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][21], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][21].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][21].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][22], accessed_by=ListGetItemGuardAccessor(22)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][22], 140561606580464)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][22], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][22].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][22].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][23], accessed_by=ListGetItemGuardAccessor(23)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][23], 140561606581104)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][23], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][23].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][23].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][24], accessed_by=ListGetItemGuardAccessor(24)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][24], 140561606580864)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][24], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][24].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][24].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][25], accessed_by=ListGetItemGuardAccessor(25)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][25], 140561606580144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][25], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][25].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][25].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][26], accessed_by=ListGetItemGuardAccessor(26)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][26], 140561606581184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][26], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][26].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][26].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][27], accessed_by=ListGetItemGuardAccessor(27)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][27], 140561606581984)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][27], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][27].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][27].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][28], accessed_by=ListGetItemGuardAccessor(28)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][28], 140561606579984)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][28], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][28].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][28].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][29], accessed_by=ListGetItemGuardAccessor(29)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][29], 140561606580224)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][29], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][29].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][29].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][30], accessed_by=ListGetItemGuardAccessor(30)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][30], 140561606579904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][30], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][30].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][30].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][31], accessed_by=ListGetItemGuardAccessor(31)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][31], 140561606579104)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][31], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][31].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][31].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][32], accessed_by=ListGetItemGuardAccessor(32)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][32], 140561606580384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][32], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][32].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][32].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][33], accessed_by=ListGetItemGuardAccessor(33)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][33], 140561606581344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][33], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][33].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][33].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][34], accessed_by=ListGetItemGuardAccessor(34)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][34], 140561606579024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][34], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][34].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][34].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][35], accessed_by=ListGetItemGuardAccessor(35)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][35], 140561606578944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][35], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][35].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][35].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][36], accessed_by=ListGetItemGuardAccessor(36)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][36], 140561606579584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][36], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][36].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][36].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][37], accessed_by=ListGetItemGuardAccessor(37)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][37], 140561606577984)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][37], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][37].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][37].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][38], accessed_by=ListGetItemGuardAccessor(38)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][38], 140561606579424)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][38], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][38].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][38].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][39], accessed_by=ListGetItemGuardAccessor(39)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][39], 140561606580304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][39], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][39].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][39].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][40], accessed_by=ListGetItemGuardAccessor(40)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][40], 140561606577424)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][40], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][40].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][40].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][41], accessed_by=ListGetItemGuardAccessor(41)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][41], 140561606578144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][41], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][41].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][41].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][42], accessed_by=ListGetItemGuardAccessor(42)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][42], 140561606578464)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][42], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][42].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][42].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][43], accessed_by=ListGetItemGuardAccessor(43)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][43], 140561606577904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][43], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][43].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][43].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][44], accessed_by=ListGetItemGuardAccessor(44)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][44], 140561606578624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][44], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][44].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][44].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][45], accessed_by=ListGetItemGuardAccessor(45)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][45], 140561606578784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][45], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][45].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][45].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][46], accessed_by=ListGetItemGuardAccessor(46)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][46], 140561606576704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][46], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][46].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][46].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][47], accessed_by=ListGetItemGuardAccessor(47)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][47], 140561606577184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][47], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][47].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][47].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][48], accessed_by=ListGetItemGuardAccessor(48)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][48], 140561606577824)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][48], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][48].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][48].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][49], accessed_by=ListGetItemGuardAccessor(49)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][49], 140561606576944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][49], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][49].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][49].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][50], accessed_by=ListGetItemGuardAccessor(50)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][50], 140561606577664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][50], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][50].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][50].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][51], accessed_by=ListGetItemGuardAccessor(51)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][51], 140561606578544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][51], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][51].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][51].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][52], accessed_by=ListGetItemGuardAccessor(52)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][52], 140561606575664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][52], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][52].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][52].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][53], accessed_by=ListGetItemGuardAccessor(53)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][53], 140561606576464)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][53], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][53].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][53].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][54], accessed_by=ListGetItemGuardAccessor(54)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][54], 140561606576864)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][54], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][54].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][54].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][55], accessed_by=ListGetItemGuardAccessor(55)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][55], 140561606576384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][55], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][55].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][55].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][56], accessed_by=ListGetItemGuardAccessor(56)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][56], 140561606577104)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][56], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][56].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][56].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][57], accessed_by=ListGetItemGuardAccessor(57)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][57], 140561606577744)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][57], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][57].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][57].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][58], accessed_by=ListGetItemGuardAccessor(58)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][58], 140561606574864)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][58], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][58].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][58].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][59], accessed_by=ListGetItemGuardAccessor(59)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][59], 140561606575504)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][59], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][59].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][59].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][60], accessed_by=ListGetItemGuardAccessor(60)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][60], 140561606575984)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][60], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][60].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][60].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][61], accessed_by=ListGetItemGuardAccessor(61)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][61], 140561606575424)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][61], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][61].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][61].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][62], accessed_by=ListGetItemGuardAccessor(62)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][62], 140561606575824)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][62], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][62].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][62].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][63], accessed_by=ListGetItemGuardAccessor(63)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][63], 140561606576784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][63], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][63].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][63].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][64], accessed_by=ListGetItemGuardAccessor(64)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][64], 140561606574544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][64], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][64].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][64].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][65], accessed_by=ListGetItemGuardAccessor(65)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][65], 140561606574704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][65], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][65].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][65].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][66], accessed_by=ListGetItemGuardAccessor(66)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][66], 140561606575024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][66], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][66].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][66].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][67], accessed_by=ListGetItemGuardAccessor(67)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][67], 140561606585664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][67], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][67].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][67].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][68], accessed_by=ListGetItemGuardAccessor(68)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][68], 140561606575264)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][68], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][68].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][68].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][69], accessed_by=ListGetItemGuardAccessor(69)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][69], 140561606576304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][69], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][69].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][69].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][70], accessed_by=ListGetItemGuardAccessor(70)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][70], 140561606581664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][70], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][70].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][70].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][71], accessed_by=ListGetItemGuardAccessor(71)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][71], 140561606582304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][71], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][71].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][71].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][72], accessed_by=ListGetItemGuardAccessor(72)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][72], 140561606584064)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][72], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][72].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][72].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][73], accessed_by=ListGetItemGuardAccessor(73)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][73], 140561606580704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][73], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][73].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][73].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][74], accessed_by=ListGetItemGuardAccessor(74)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][74], 140561606583264)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][74], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][74].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][74].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][75], accessed_by=ListGetItemGuardAccessor(75)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][75], 140561606575344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][75], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][75].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][75].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][76], accessed_by=ListGetItemGuardAccessor(76)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][76], 140561606576544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][76], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][76].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][76].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][77], accessed_by=ListGetItemGuardAccessor(77)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][77], 140561606577344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][77], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][77].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][77].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][78], accessed_by=ListGetItemGuardAccessor(78)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][78], 140561606578304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][78], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][78].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][78].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][79], accessed_by=ListGetItemGuardAccessor(79)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][79], 140561606575584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][79], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][79].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][79].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][80], accessed_by=ListGetItemGuardAccessor(80)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][80], 140561606579824)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][80], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][80].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][80].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][81], accessed_by=ListGetItemGuardAccessor(81)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][81], 140561606585024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][81], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][81].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][81].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][82], accessed_by=ListGetItemGuardAccessor(82)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][82], 140561606584944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][82], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][82].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][82].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][83], accessed_by=ListGetItemGuardAccessor(83)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][83], 140561606585104)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][83], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][83].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][83].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][84], accessed_by=ListGetItemGuardAccessor(84)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][84], 140561606583744)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][84], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][84].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][84].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][85], accessed_by=ListGetItemGuardAccessor(85)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][85], 140561606582464)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][85], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][85].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][85].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][86], accessed_by=ListGetItemGuardAccessor(86)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][86], 140561606585584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][86], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][86].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][86].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][87], accessed_by=ListGetItemGuardAccessor(87)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][87], 140561606578864)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][87], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][87].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][87].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][88], accessed_by=ListGetItemGuardAccessor(88)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][88], 140561606583344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][88], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][88].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][88].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][89], accessed_by=ListGetItemGuardAccessor(89)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][89], 140561606583984)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][89], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][89].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][89].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][90], accessed_by=ListGetItemGuardAccessor(90)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][90], 140561606584224)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][90], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][90].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][90].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][91], accessed_by=ListGetItemGuardAccessor(91)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][91], 140561606582784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][91], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][91].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][91].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][92], accessed_by=ListGetItemGuardAccessor(92)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][92], 140561606584784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][92], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][92].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][92].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][93], accessed_by=ListGetItemGuardAccessor(93)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][93], 140561606574944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][93], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][93].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][93].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][94], accessed_by=ListGetItemGuardAccessor(94)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][94], 140561606581824)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][94], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][94].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][94].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][95], accessed_by=ListGetItemGuardAccessor(95)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][95], 140561606582624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][95], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][95].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][95].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][96], accessed_by=ListGetItemGuardAccessor(96)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][96], 140561606582944)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][96], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][96].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][96].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][97], accessed_by=ListGetItemGuardAccessor(97)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][97], 140561606581904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][97], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][97].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][97].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][98], accessed_by=ListGetItemGuardAccessor(98)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][98], 140561606582864)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][98], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][98].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][98].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][99], accessed_by=ListGetItemGuardAccessor(99)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][99], 140561606584304)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][99], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][99].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][99].grad is not None    
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][100], accessed_by=ListGetItemGuardAccessor(100)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][100], 140561606580784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][100], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][100].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][100].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][101], accessed_by=ListGetItemGuardAccessor(101)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][101], 140561606581024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][101], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][101].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][101].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][102], accessed_by=ListGetItemGuardAccessor(102)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][102], 140561606581264)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][102], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][102].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][102].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][103], accessed_by=ListGetItemGuardAccessor(103)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][103], 140561606580544)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][103], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][103].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][103].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][104], accessed_by=ListGetItemGuardAccessor(104)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][104], 140561606581504)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][104], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][104].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][104].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][105], accessed_by=ListGetItemGuardAccessor(105)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][105], 140561606580624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][105], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][105].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][105].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][106], accessed_by=ListGetItemGuardAccessor(106)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][106], 140561606579344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][106], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][106].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][106].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][107], accessed_by=ListGetItemGuardAccessor(107)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][107], 140561606579504)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][107], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][107].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][107].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][108], accessed_by=ListGetItemGuardAccessor(108)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][108], 140561606579664)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][108], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][108].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][108].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][109], accessed_by=ListGetItemGuardAccessor(109)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][109], 140561606579264)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][109], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][109].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][109].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][110], accessed_by=ListGetItemGuardAccessor(110)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][110], 140561606577024)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][110], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][110].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][110].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][111], accessed_by=ListGetItemGuardAccessor(111)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][111], 140561606579184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][111], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][111].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][111].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][112], accessed_by=ListGetItemGuardAccessor(112)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][112], 140561606578064)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][112], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][112].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][112].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][113], accessed_by=ListGetItemGuardAccessor(113)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][113], 140561606575744)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][113], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][113].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][113].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][114], accessed_by=ListGetItemGuardAccessor(114)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][114], 140561606578224)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][114], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][114].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][114].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][115], accessed_by=ListGetItemGuardAccessor(115)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][115], 140561606577584)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][115], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][115].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][115].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][116], accessed_by=ListGetItemGuardAccessor(116)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][116], 140561606578704)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][116], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][116].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][116].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][117], accessed_by=ListGetItemGuardAccessor(117)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][117], 140561606580064)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][117], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][117].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][117].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][118], accessed_by=ListGetItemGuardAccessor(118)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][118], 140561606585744)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][118], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][118].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][118].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][119], accessed_by=ListGetItemGuardAccessor(119)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][119], 140561606576064)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][119], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][119].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][119].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][120], accessed_by=ListGetItemGuardAccessor(120)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][120], 140561606577264)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][120], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][120].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][120].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][121], accessed_by=ListGetItemGuardAccessor(121)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][121], 140561606576224)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][121], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][121].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][121].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][122], accessed_by=ListGetItemGuardAccessor(122)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][122], 140561606576624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][122], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][122].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][122].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][123], accessed_by=ListGetItemGuardAccessor(123)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][123], 140561606578384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][123], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][123].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][123].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][124], accessed_by=ListGetItemGuardAccessor(124)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][124], 140561606574784)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][124], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][124].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][124].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][125], accessed_by=ListGetItemGuardAccessor(125)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][125], 140561606575184)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][125], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][125].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][125].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][126], accessed_by=ListGetItemGuardAccessor(126)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][126], 140565185705792)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][126], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][126].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][126].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][127], accessed_by=ListGetItemGuardAccessor(127)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][127], 140561606574624)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][127], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][127].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][127].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][128], accessed_by=ListGetItemGuardAccessor(128)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][128], 140561606576144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][128], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][128].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][128].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][129], accessed_by=ListGetItemGuardAccessor(129)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][129], 140561606577504)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][129], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][129].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][129].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][130], accessed_by=ListGetItemGuardAccessor(130)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][130], 140561608015712)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][130], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][130].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][130].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][131], accessed_by=ListGetItemGuardAccessor(131)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][131], 140561608015392)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][131], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][131].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][131].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][132], accessed_by=ListGetItemGuardAccessor(132)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][132], 140561608015312)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][132], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][132].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][132].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][133], accessed_by=ListGetItemGuardAccessor(133)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][133], 140561608015472)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][133], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][133].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][133].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][134], accessed_by=ListGetItemGuardAccessor(134)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][134], 140561606584144)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][134], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][134].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][134].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][135], accessed_by=ListGetItemGuardAccessor(135)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][135], 140561606575904)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][135], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][135].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][135].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][136], accessed_by=ListGetItemGuardAccessor(136)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][136], 140561608015232)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][136], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][136].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][136].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][137], accessed_by=ListGetItemGuardAccessor(137)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][137], 140561608014832)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][137], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[2304], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][137].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][137].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][138], accessed_by=ListGetItemGuardAccessor(138)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][138], 140561608014992)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][138], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][138].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][138].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][139], accessed_by=ListGetItemGuardAccessor(139)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][139], 140561608014912)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][139], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][139].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][139].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][140], accessed_by=ListGetItemGuardAccessor(140)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][140], 140561608015632)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][140], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][140].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][140].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][141], accessed_by=ListGetItemGuardAccessor(141)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][141], 140561608015552)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][141], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][141].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][141].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][142], accessed_by=ListGetItemGuardAccessor(142)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][142], 140561608014752)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][142], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072, 768], stride=[768, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][142].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][142].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][143], accessed_by=ListGetItemGuardAccessor(143)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][143], 140561608014432)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][143], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[3072], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][143].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][143].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][144], accessed_by=ListGetItemGuardAccessor(144)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][144], 140561608015152)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][144], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768, 3072], stride=[3072, 1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][144].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][144].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][145], accessed_by=ListGetItemGuardAccessor(145)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][145], 140561608014512)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][145], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][145].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][145].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][146], accessed_by=ListGetItemGuardAccessor(146)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][146], 140561606585344)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][146], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][146].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][146].grad is not None   
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][147], accessed_by=ListGetItemGuardAccessor(147)
+	| | | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['params'][147], 140561606574384)
+	| | | | | | +- TENSOR_MATCH: check_tensor(L['self'].param_groups[0]['params'][147], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[768], stride=[1])
+	| | | | | | +- NO_TENSOR_ALIASING
+	| | | | | | +- GuardManager: source=L['self'].param_groups[0]['params'][147].grad, accessed_by=GradGuardAccessor(grad)
+	| | | | | | | +- NOT_NONE: L['self'].param_groups[0]['params'][147].grad is not None   
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['lr'], accessed_by=DictGetItemGuardAccessor(lr)
+	| | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['lr'] == 0.01                     
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['betas'], accessed_by=DictGetItemGuardAccessor(betas)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self'].param_groups[0]['betas'], 94206128741824)
+	| | | | | +- LENGTH_CHECK: len(L['self'].param_groups[0]['betas']) == 2                
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['betas'][0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['betas'][0] == 0.9                
+	| | | | | +- GuardManager: source=L['self'].param_groups[0]['betas'][1], accessed_by=TupleGetItemGuardAccessor(1)
+	| | | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['betas'][1] == 0.999              
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['eps'], accessed_by=DictGetItemGuardAccessor(eps)
+	| | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['eps'] == 1e-08                   
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['weight_decay'], accessed_by=DictGetItemGuardAccessor(weight_decay)
+	| | | | | +- EQUALS_MATCH: L['self'].param_groups[0]['weight_decay'] == 0              
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['amsgrad'], accessed_by=DictGetItemGuardAccessor(amsgrad)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['amsgrad'], 94206128801376)
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['maximize'], accessed_by=DictGetItemGuardAccessor(maximize)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['maximize'], 94206128801376)
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['foreach'], accessed_by=DictGetItemGuardAccessor(foreach)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['foreach'], 94206128801408)
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['capturable'], accessed_by=DictGetItemGuardAccessor(capturable)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['capturable'], 94206128801408)
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['differentiable'], accessed_by=DictGetItemGuardAccessor(differentiable)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['differentiable'], 94206128801376)
+	| | | | +- GuardManager: source=L['self'].param_groups[0]['fused'], accessed_by=DictGetItemGuardAccessor(fused)
+	| | | | | +- ID_MATCH: ___check_obj_id(L['self'].param_groups[0]['fused'], 94206128752608)
+	| +- GuardManager: source=L['closure'], accessed_by=DictGetItemGuardAccessor(closure)
+	| | +- ID_MATCH: ___check_obj_id(L['closure'], 94206128752608)               
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['adam'], accessed_by=DictGetItemGuardAccessor(adam)
+	| | | +- GuardManager: source=G['adam'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | | +- ID_MATCH: ___check_obj_id(G['adam'].__code__, 140563175561632)        
+	| | | +- GuardManager: source=G['adam'].__closure__, accessed_by=GetAttrGuardAccessor(__closure__)
+	| | | | +- GuardManager: source=G['adam'].__closure__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | +- GuardManager: source=G['adam'].__closure__[0].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[0].cell_contents, 140563175768128)
+	| | | | +- GuardManager: source=G['adam'].__closure__[1], accessed_by=TupleGetItemGuardAccessor(1)
+	| | | | | +- GuardManager: source=G['adam'].__closure__[1].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents)
+	| | | | | | +- GuardManager: source=G['adam'].__closure__[1].cell_contents.__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[1].cell_contents.__code__, 140563175708560)
+	| | | | +- GuardManager: source=G['adam'].__closure__[2], accessed_by=TupleGetItemGuardAccessor(2)
+	| | | | | +- GuardManager: source=G['adam'].__closure__[2].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['adam'].__closure__[2].cell_contents, 94206128801408)
+	| | | | +- GuardManager: source=G['adam'].__closure__[3], accessed_by=TupleGetItemGuardAccessor(3)
+	| | | | | +- GuardManager: source=G['adam'].__closure__[3].cell_contents, accessed_by=GetAttrGuardAccessor(cell_contents)
+	| | | | | | +- EQUALS_MATCH: G['adam'].__closure__[3].cell_contents == 5                 
+	| | +- GuardManager: source=G['Tensor'], accessed_by=DictGetItemGuardAccessor(Tensor)
+	| | | +- ID_MATCH: ___check_obj_id(G['Tensor'], 94206193171168)                
+	| | +- GuardManager: source=G['Optimizer'], accessed_by=DictGetItemGuardAccessor(Optimizer)
+	| | | +- ID_MATCH: ___check_obj_id(G['Optimizer'], 94206202190960)             
+	| | | +- GuardManager: source=G['Optimizer']._group_tensors_by_device_and_dtype, accessed_by=GetAttrGuardAccessor(_group_tensors_by_device_and_dtype)
+	| | | | +- GuardManager: source=G['Optimizer']._group_tensors_by_device_and_dtype.__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['Optimizer']._group_tensors_by_device_and_dtype.__code__, 140563175565504)
+	| | +- GuardManager: source=G['_multi_tensor_adam'], accessed_by=DictGetItemGuardAccessor(_multi_tensor_adam)
+	| | | +- GuardManager: source=G['_multi_tensor_adam'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_multi_tensor_adam'].__code__, 140563175707856)
+	| | +- GuardManager: source=G['__builtins_dict___23'], accessed_by=DictGetItemGuardAccessor(__builtins_dict___23)
+	| | | +- GuardManager: source=G['__builtins_dict___23']['len'], accessed_by=DictGetItemGuardAccessor(len)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['len'], 140565189726736)
+	| | | +- GuardManager: source=G['__builtins_dict___23']['list'], accessed_by=DictGetItemGuardAccessor(list)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['list'], 94206128766016)
+	| | | +- GuardManager: source=G['__builtins_dict___23']['range'], accessed_by=DictGetItemGuardAccessor(range)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['range'], 94206128748288)
+	| | | +- GuardManager: source=G['__builtins_dict___23']['getattr'], accessed_by=DictGetItemGuardAccessor(getattr)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['getattr'], 140565189725856)
+	| | | +- GuardManager: source=G['__builtins_dict___23']['isinstance'], accessed_by=DictGetItemGuardAccessor(isinstance)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___23']['isinstance'], 140565189726416)
+	| | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'], accessed_by=DictGetItemGuardAccessor(__import_torch_dot_optim_dot_optimizer)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'], 140563175468672)
+	| | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch, accessed_by=GetAttrGuardAccessor(torch)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch, 140565184683664)
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.jit, accessed_by=GetAttrGuardAccessor(jit)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.jit, 140563222375024)
+	| | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.jit.is_scripting, accessed_by=GetAttrGuardAccessor(is_scripting)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.jit.is_scripting, 140563303435856)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.compiler, 140562864251632)
+	| | | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch.compiler.is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch.compiler.is_compiling, 140562863847696)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_pow, accessed_by=GetAttrGuardAccessor(_foreach_pow)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_pow, 140565181190432)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_add_, accessed_by=GetAttrGuardAccessor(_foreach_add_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_add_, 140565181126176)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_div_, accessed_by=GetAttrGuardAccessor(_foreach_div_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_div_, 140565181078464)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_mul_, accessed_by=GetAttrGuardAccessor(_foreach_mul_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_mul_, 140565181188672)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_neg_, accessed_by=GetAttrGuardAccessor(_foreach_neg_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_neg_, 140565181128096)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt, accessed_by=GetAttrGuardAccessor(_foreach_sqrt)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt, 140565181128736)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sub_, accessed_by=GetAttrGuardAccessor(_foreach_sub_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sub_, 140565181126336)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_lerp_, accessed_by=GetAttrGuardAccessor(_foreach_lerp_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_lerp_, 140565181079904)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt_, accessed_by=GetAttrGuardAccessor(_foreach_sqrt_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_sqrt_, 140565181128816)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcdiv_, accessed_by=GetAttrGuardAccessor(_foreach_addcdiv_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcdiv_, 140565181189552)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcmul_, accessed_by=GetAttrGuardAccessor(_foreach_addcmul_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_addcmul_, 140565181079024)
+	| | | | +- GuardManager: source=G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_reciprocal_, accessed_by=GetAttrGuardAccessor(_foreach_reciprocal_)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_optim_dot_optimizer'].torch._foreach_reciprocal_, 140565181190752)
+	| | +- GuardManager: source=G['__optimizer_140561654732528_140560320971456_c14'](), accessed_by=GlobalWeakRefGuardAccessor(__optimizer_140561654732528_140560320971456_c14)
+	| | | +- NOT_NONE: G['__optimizer_140561654732528_140560320971456_c14']() is not None
+	| | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor(torch)
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	| | | +- OBJECT_ALIASING: G['__import_torch_dot_optim_dot_optimizer'].torch is G['torch']
+	
+V0806 13:56:22.656000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "6373403406566b0d07d809832e4636e9"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977782656800.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.656000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "403a651c09acb31ec87103d6cb0ce6c4"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977782656905.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.657000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "7/0", "frame_key": "10", "co_name": "step", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/adam.py", "co_firstlineno": 197, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 1564, "shape_env_guard_count": 0, "graph_op_count": 461, "graph_node_count": 1202, "graph_input_count": 740, "start_time": 1722977765.99917, "entire_frame_compile_time_s": 16.65777063369751, "backend_compile_time_s": 15.009882926940918, "inductor_compile_time_s": 12.278924465179443, "code_gen_time_s": 8.707001686096191, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true}, "frame_id": 7, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:22.661000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "933b77f235a58b3669385c39e9b5c847"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977782661519.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:22.661000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a4d8ac42e95374728a2395c2b83481b7"}
+	{
+	"name": "cudagraphify",
+	"ts": 1722977782661842.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.669000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 2556, "name": "optimizer_step", "filename": 2}, {"line": 478, "name": "wrapper", "filename": 4}, {"line": 478, "name": "torch_dynamo_resume_in_wrapper_at_478", "filename": 4}]}, "frame_id": 8, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:22.669000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "be3193b9ffb67bac58a91607b0886088"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977782669412.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:22.669000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f6f07a6528d283fa873344e0ed477c20"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977782669488.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "a463cd5fab0e85ef4cdb2d49ec071c30"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977782672002.0,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "0b15ffeae50ebb8d1de380fc9eddce48"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977782672101.5,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "8/0", "frame_key": "11", "co_name": "torch_dynamo_resume_in_wrapper_at_478", "co_filename": "/data/users/jjwu/a/pytorch/torch/optim/optimizer.py", "co_firstlineno": 478, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "start_time": 1722977782.6693876, "entire_frame_compile_time_s": null, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0028078556060791016, "has_guarded_code": false}, "frame_id": 8, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:22.672000 4107173 torch/_dynamo/convert_frame.py:869] {"dynamo_start": {"stack": [{"line": 460, "name": "<module>", "filename": 1}, {"line": 456, "name": "torchbench_main", "filename": 1}, {"line": 4086, "name": "main", "filename": 2}, {"line": 4018, "name": "process_entry", "filename": 2}, {"line": 4680, "name": "run", "filename": 2}, {"line": 3378, "name": "run_one_model", "filename": 2}, {"line": 3209, "name": "run_performance_test", "filename": 2}, {"line": 3141, "name": "warmup", "filename": 2}, {"line": 464, "name": "_fn", "filename": 3}, {"line": 437, "name": "forward_and_backward_pass", "filename": 1}, {"line": 438, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_437", "filename": 1}, {"line": 444, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_438", "filename": 1}, {"line": 445, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_444", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_445", "filename": 1}, {"line": 446, "name": "torch_dynamo_resume_in_forward_and_backward_pass_at_446", "filename": 1}]}, "frame_id": 9, "frame_compile_id": 0, "attempt": 0}
+V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "70a9b0f3c76c037192abe119fdeef60f"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977782672775.2,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:22.672000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "ccea687f80d2b7fe110135062e930fa4"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977782672841.5,
+	"args": null,
+	"ph": "B",
+	"pid": 0
+	}
+V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 0, "describer_id": 316, "size": 201216}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 0, "ndim": 3, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 1, 50304], "requires_grad": true, "stride": [50304, 50304, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6e416c5e0>", "describer_id": 316}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.675000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 316, "id": 0, "source": "L['pred']"}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:204] {"describe_storage": {"id": 1, "describer_id": 316, "size": 4}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:410] {"describe_tensor": {"id": 1, "ndim": 0, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [], "requires_grad": true, "stride": [], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7fd6d03c1cb0>", "describer_id": 316}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.677000 4107173 torch/_subclasses/meta_utils.py:1633] {"describe_source": {"describer_id": 316, "id": 1, "source": "L['loss']"}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
+V0806 13:56:22.679000 4107173 torch/_dynamo/guards.py:2195] {"dynamo_cpp_guards_str": {}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1, "has_payload": "fb4ecbe50cbd7aa2ddaad613c4ad31e3"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:462 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- GuardManager: source=L['mod'], accessed_by=DictGetItemGuardAccessor(mod)
+	| | +- TYPE_MATCH: ___check_type_id(L['mod'], 94206531299328)                  
+	| +- GuardManager: source=L['loss'], accessed_by=DictGetItemGuardAccessor(loss)
+	| | +- TENSOR_MATCH: check_tensor(L['loss'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[], stride=[])
+	| | +- NO_HASATTR: hasattr(L['loss'], '_dynamo_dynamic_indices') == False      
+	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['loss'], L['pred'])
+	| +- GuardManager: source=L['pred'], accessed_by=DictGetItemGuardAccessor(pred)
+	| | +- TENSOR_MATCH: check_tensor(L['pred'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[1, 1, 50304], stride=[50304, 50304, 1])
+	| | +- NO_HASATTR: hasattr(L['pred'], '_dynamo_dynamic_indices') == False      
+	| | +- NO_TENSOR_ALIASING
+	| +- GuardManager: source=L['cloned_inputs'], accessed_by=DictGetItemGuardAccessor(cloned_inputs)
+	| | +- TYPE_MATCH: ___check_type_id(L['cloned_inputs'], 94206128766016)        
+	| | +- LENGTH_CHECK: len(L['cloned_inputs']) == 1                                
+	| +- GuardManager: source=L['collect_outputs'], accessed_by=DictGetItemGuardAccessor(collect_outputs)
+	| | +- ID_MATCH: ___check_obj_id(L['collect_outputs'], 94206128801408)       
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
+	| | +- GuardManager: source=G['collect_results'], accessed_by=DictGetItemGuardAccessor(collect_results)
+	| | | +- ID_MATCH: ___check_obj_id(G['collect_results'], 140561699517296)      
+	
+V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "f291c5d56f337d7413c8577515cab788"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1722977782679682.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:824] {"chromium_event": {}, "has_payload": "186863a8eb64744b41d4349598d565d8"}
+	{
+	"name": "_compile.compile_inner",
+	"ts": 1722977782679751.8,
+	"args": null,
+	"ph": "E",
+	"pid": 0
+	}
+V0806 13:56:22.679000 4107173 torch/_dynamo/utils.py:779] {"compilation_metrics": {"compile_id": "9/0", "frame_key": "13", "co_name": "torch_dynamo_resume_in_forward_and_backward_pass_at_446", "co_filename": "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", "co_firstlineno": 446, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 11, "shape_env_guard_count": 0, "graph_op_count": 0, "graph_node_count": 2, "graph_input_count": 2, "start_time": 1722977782.672749, "entire_frame_compile_time_s": 0.007033109664916992, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function collect_results in file /data/users/jjwu/a/pytorch/torch/_dynamo/testing.py'"], "dynamo_time_before_restart_s": 0.0009343624114990234, "has_guarded_code": true}, "frame_id": 9, "frame_compile_id": 0, "attempt": 1}
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 74f19ea..4a3b4d4 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -128,3 +128,27 @@ fn test_parse_artifact() {
         );
     }
 }
+
+#[test]
+fn test_parse_chromium_event() {
+    let expected_files = ["chromium_events.json", "index.html"];
+    // Read the test file
+    // simple.log was generated from the following:
+    // TORCH_TRACE=~/trace_logs/test python test/inductor/test_torchinductor.py  -k TORCH_TRACE=~/trace_logs/comp_metrics python test/dynamo/test_misc.py -k test_graph_break_compilation_metrics_on_failure
+    let path = Path::new("tests/inputs/chromium_nanogpt_cache_miss.log").to_path_buf();
+    let config = tlparse::ParseConfig {
+        strict: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+}